The attached patch provides some improved patterns for "and with complement" to the s390 machine description. Bootstrapped and regression tested on s390 and s390x.
Ciao Dominik ^_^ ^_^ -- Dominik Vogt IBM Germany
gcc/ChangeLog * config/s390/s390.h (REG_OR_SUBREG_P): New helper macro. * config/s390/s390.c (s390_expand_logical_operator): Force operands from memory into registers for expressions with register destination. (s390_logical_operator_si3_ok_p) (s390_andc_split_ok_p): New functions. * config/s390/s390-protos.h (s390_logical_operator_si3_ok_p) (s390_andc_split_ok_p): Add prototypes. * config/s390/s390.md ("*andc_split", "*andc_split2"): New splitters for and with complement. ("*andsi3_zarch", "*iorsi3_zarch", "xorsi3"): Call s390_logical_operator_si3_ok_p. gcc/testsuite/ChangeLog * gcc.target/s390/md/andc-splitter-1.c: New test case. * gcc.target/s390/md/andc-splitter-2.c: Likewise.
>From de225e02fe79661642f123fd0505a0bd60f20066 Mon Sep 17 00:00:00 2001 From: Dominik Vogt <v...@linux.vnet.ibm.com> Date: Mon, 14 Mar 2016 17:48:17 +0100 Subject: [PATCH] S/390: Add splitter for "and" with complement. Force splitting of logical operator expressions ... with three operands, a register destination and a memory operand because there are no instructions for that and combine results in inefficient code. --- gcc/config/s390/s390-protos.h | 2 + gcc/config/s390/s390.c | 65 ++++++++++++++++++++++ gcc/config/s390/s390.h | 3 + gcc/config/s390/s390.md | 52 ++++++++++++++++- gcc/testsuite/gcc.target/s390/md/andc-splitter-1.c | 61 ++++++++++++++++++++ gcc/testsuite/gcc.target/s390/md/andc-splitter-2.c | 38 +++++++++++++ 6 files changed, 218 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/md/andc-splitter-1.c create mode 100644 gcc/testsuite/gcc.target/s390/md/andc-splitter-2.c diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index 2ccf0bb..8ba4d5d 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -127,6 +127,8 @@ extern rtx_insn *s390_emit_call (rtx, rtx, rtx, rtx); extern void s390_expand_logical_operator (enum rtx_code, machine_mode, rtx *); extern bool s390_logical_operator_ok_p (rtx *); +extern bool s390_logical_operator_si3_ok_p (rtx *); +extern bool s390_andc_split_ok_p (rtx *); extern void s390_narrow_logical_operator (enum rtx_code, rtx *, rtx *); extern void s390_split_access_reg (rtx, rtx *, rtx *); diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index cb5dd5f..1a303d8 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -2558,6 +2558,27 @@ s390_expand_logical_operator (enum rtx_code code, machine_mode mode, src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0); } + /* We have no useful instructions with three operands, the source in memory + and the destination in a register. Reload memory operands to register if + necessary. */ + if (!s390_logical_operator_si3_ok_p (operands)) + { + if (MEM_P (src1)) + { + rtx temp = gen_reg_rtx (mode); + + emit_move_insn (temp, src1); + src1 = temp; + } + if (MEM_P (src2)) + { + rtx temp = gen_reg_rtx (mode); + + emit_move_insn (temp, src2); + src2 = temp; + } + } + /* Emit the instruction. */ op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2)); clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); @@ -2583,6 +2604,50 @@ s390_logical_operator_ok_p (rtx *operands) return true; } +/* Rejects operand combinations of logical operations (AND, IOR, XOR) that + result in less efficient code later. */ + +bool +s390_logical_operator_si3_ok_p (rtx *operands) +{ + if (!s390_logical_operator_ok_p (operands)) + return false; + if (reload_completed) + return true; + /* Reject three operand expressions with register destination if one of the + sources is a memory operand and the other is not a const_int operand. */ + if (REG_OR_SUBREG_P (operands[0]) + && (MEM_P (operands[1]) || MEM_P (operands[2])) + && !(CONST_INT_P (operands[1]) || CONST_INT_P (operands[2]))) + return false; + + return true; +} + +/* Rejects operand combinations of AND operations that result in less efficient + code later. */ + +bool +s390_andc_split_ok_p (rtx *operands) +{ + if (reload_completed) + return false; + if (!s390_logical_operator_si3_ok_p (operands)) + return false; + /* Reject two operand expressions with a memory destination that is identical + to one of the source operands and the other operand a register or memory + because the splitter would replace the destination with a register yielding + an undefined pattern. */ + if (MEM_P (operands[0]) + && (MEM_P (operands[1]) || REG_OR_SUBREG_P (operands[1])) + && (MEM_P (operands[2]) || REG_OR_SUBREG_P (operands[2])) + && (rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))) + return false; + + return true; +} + /* Narrow logical operation CODE of memory operand MEMOP with immediate operand IMMOP to switch from SS to SI type instructions. */ diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index 3a7be1a..19185ed 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -389,6 +389,9 @@ extern const char *s390_host_detect_local_cpu (int argc, const char **argv); #define ACCESS_REG_P(X) (REG_P (X) && ACCESS_REGNO_P (REGNO (X))) #define VECTOR_NOFP_REG_P(X) (REG_P (X) && VECTOR_NOFP_REGNO_P (REGNO (X))) #define VECTOR_REG_P(X) (REG_P (X) && VECTOR_REGNO_P (REGNO (X))) +#define REG_OR_SUBREG_P(x) \ + (REG_P ((x)) || (SUBREG_P ((x)) && REG_P (SUBREG_REG ((x))))) + /* Set up fixed registers and calling convention: diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 5f3b0f7..b120197 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -7100,7 +7100,7 @@ (match_operand:SI 2 "general_operand" " M,M,N0HSF,N1HSF,Os,d,d,R,T,NxxSq,NxQSF,Q"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ZARCH && s390_logical_operator_ok_p (operands)" + "TARGET_ZARCH && s390_logical_operator_si3_ok_p (operands)" "@ # # @@ -7234,6 +7234,52 @@ (set_attr "z10prop" "z10_super_E1,z10_super,*")]) ; +; And with complement +; +; c = ~b & a = (b & a) ^ a + +(define_insn_and_split "*andc_split" + [(set (match_operand:GPR 0 "nonimmediate_operand" "") + (and:GPR (not:GPR (match_operand:GPR 1 "nonimmediate_operand" "")) + (match_operand:GPR 2 "general_operand" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ZARCH && s390_andc_split_ok_p (operands)" + "#" + "&& 1" + [ + (parallel + [(set (match_dup 3) (and:GPR (match_dup 1) (match_dup 2))) + (clobber (reg:CC CC_REGNUM))]) + (parallel + [(set (match_dup 0) (xor:GPR (match_dup 3) (match_dup 2))) + (clobber (reg:CC CC_REGNUM))])] +{ + if (reg_overlap_mentioned_p (operands[0], operands[2])) + { + gcc_assert (can_create_pseudo_p ()); + operands[3] = gen_reg_rtx (<MODE>mode); + } + else + operands[3] = operands[0]; +}) + +; Convert "(xor (operand) (-1))" to "(not (operand))" for low optimization +; levels so that "*andc_split" matches. +(define_insn_and_split "*andc_split2" + [(set (match_operand:GPR 0 "nonimmediate_operand" "") + (and:GPR (xor:GPR (match_operand:GPR 1 "nonimmediate_operand" "") + (const_int -1)) + (match_operand:GPR 2 "general_operand" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ZARCH && s390_andc_split_ok_p (operands)" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) (and:GPR (not:GPR (match_dup 1)) (match_dup 2))) + (clobber (reg:CC CC_REGNUM))])] +) + +; ; Block and (NC) patterns. ; @@ -7429,7 +7475,7 @@ (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0, 0, 0,0,d,0,0, 0,0") (match_operand:SI 2 "general_operand" "N0HS0,N1HS0,Os,d,d,R,T,NxQS0,Q"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ZARCH && s390_logical_operator_ok_p (operands)" + "TARGET_ZARCH && s390_logical_operator_si3_ok_p (operands)" "@ oilh\t%0,%i2 oill\t%0,%i2 @@ -7751,7 +7797,7 @@ (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0, 0,0") (match_operand:SI 2 "general_operand" "Os,d,d,R,T,NxQS0,Q"))) (clobber (reg:CC CC_REGNUM))] - "s390_logical_operator_ok_p (operands)" + "s390_logical_operator_si3_ok_p (operands)" "@ xilf\t%0,%o2 xr\t%0,%2 diff --git a/gcc/testsuite/gcc.target/s390/md/andc-splitter-1.c b/gcc/testsuite/gcc.target/s390/md/andc-splitter-1.c new file mode 100644 index 0000000..ed78921 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/md/andc-splitter-1.c @@ -0,0 +1,61 @@ +/* Machine description pattern tests. */ + +/* { dg-do run { target { lp64 } } } */ +/* { dg-options "-mzarch -save-temps -dP" } */ +/* Skip test if -O0 is present on the command line: + + { dg-skip-if "" { *-*-* } { "-O0" } { "" } } + + Skip test if the -O option is missing from the command line + { dg-skip-if "" { *-*-* } { "*" } { "-O*" } } +*/ + +__attribute__ ((noinline)) +unsigned long andc_vv(unsigned long a, unsigned long b) +{ return ~b & a; } +/* { dg-final { scan-assembler ":15 .\* \{\\*anddi3\}" } } */ +/* { dg-final { scan-assembler ":15 .\* \{\\*xordi3\}" } } */ + +__attribute__ ((noinline)) +unsigned long andc_pv(unsigned long *a, unsigned long b) +{ return ~b & *a; } +/* { dg-final { scan-assembler ":21 .\* \{\\*anddi3\}" } } */ +/* { dg-final { scan-assembler ":21 .\* \{\\*xordi3\}" } } */ + +__attribute__ ((noinline)) +unsigned long andc_vp(unsigned long a, unsigned long *b) +{ return ~*b & a; } +/* { dg-final { scan-assembler ":27 .\* \{\\*anddi3\}" } } */ +/* { dg-final { scan-assembler ":27 .\* \{\\*xordi3\}" } } */ + +__attribute__ ((noinline)) +unsigned long andc_pp(unsigned long *a, unsigned long *b) +{ return ~*b & *a; } +/* { dg-final { scan-assembler ":33 .\* \{\\*anddi3\}" } } */ +/* { dg-final { scan-assembler ":33 .\* \{\\*xordi3\}" } } */ + +/* { dg-final { scan-assembler-times "\tngr\?k\?\t" 4 } } */ +/* { dg-final { scan-assembler-times "\txgr\?\t" 4 } } */ + +int +main (void) +{ + unsigned long a = 0xc00000000000000cllu; + unsigned long b = 0x500000000000000allu; + unsigned long e = 0x8000000000000004llu; + unsigned long c; + + c = andc_vv (a, b); + if (c != e) + __builtin_abort (); + c = andc_pv (&a, b); + if (c != e) + __builtin_abort (); + c = andc_vp (a, &b); + if (c != e) + __builtin_abort (); + c = andc_pp (&a, &b); + if (c != e) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/s390/md/andc-splitter-2.c b/gcc/testsuite/gcc.target/s390/md/andc-splitter-2.c new file mode 100644 index 0000000..9e78335 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/md/andc-splitter-2.c @@ -0,0 +1,38 @@ +/* Machine description pattern tests. */ + +/* { dg-do compile } */ +/* { dg-options "-mzarch -save-temps -dP" } */ +/* Skip test if -O0 is present on the command line: + + { dg-skip-if "" { *-*-* } { "-O0" } { "" } } + + Skip test if the -O option is missing from the command line + { dg-skip-if "" { *-*-* } { "*" } { "-O*" } } +*/ + +__attribute__ ((noinline)) +unsigned int andc_vv(unsigned int a, unsigned int b) +{ return ~b & a; } +/* { dg-final { scan-assembler ":15 .\* \{\\*andsi3_zarch\}" } } */ +/* { dg-final { scan-assembler ":15 .\* \{\\*xorsi3\}" } } */ + +__attribute__ ((noinline)) +unsigned int andc_pv(unsigned int *a, unsigned int b) +{ return ~b & *a; } +/* { dg-final { scan-assembler ":21 .\* \{\\*andsi3_zarch\}" } } */ +/* { dg-final { scan-assembler ":21 .\* \{\\*xorsi3\}" } } */ + +__attribute__ ((noinline)) +unsigned int andc_vp(unsigned int a, unsigned int *b) +{ return ~*b & a; } +/* { dg-final { scan-assembler ":27 .\* \{\\*andsi3_zarch\}" } } */ +/* { dg-final { scan-assembler ":27 .\* \{\\*xorsi3\}" } } */ + +__attribute__ ((noinline)) +unsigned int andc_pp(unsigned int *a, unsigned int *b) +{ return ~*b & *a; } +/* { dg-final { scan-assembler ":33 .\* \{\\*andsi3_zarch\}" } } */ +/* { dg-final { scan-assembler ":33 .\* \{\\*xorsi3\}" } } */ + +/* { dg-final { scan-assembler-times "\tnr\?k\?\t" 4 } } */ +/* { dg-final { scan-assembler-times "\txr\?k\?\t" 4 } } */ -- 2.3.0