[PATCH 2/2] xtensa: Fix RTL insn cost estimation about relaxed MOVI instructions
These instructions will all be converted to L32R ones with litpool entries by the assembler. gcc/ChangeLog: * config/xtensa/xtensa.cc (xtensa_is_insn_L32R_p): Consider relaxed MOVI instructions as L32R. --- gcc/config/xtensa/xtensa.cc | 22 ++ 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index 2c534ff9c60..13f2b2b832c 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -4286,17 +4286,23 @@ xtensa_is_insn_L32R_p (const rtx_insn *insn) { rtx x = PATTERN (insn); - if (GET_CODE (x) == SET) + if (GET_CODE (x) != SET) +return false; + + x = XEXP (x, 1); + if (MEM_P (x)) { - x = SET_SRC (x); - if (MEM_P (x)) - { - x = XEXP (x, 0); - return (SYMBOL_REF_P (x) || CONST_INT_P (x)) -&& CONSTANT_POOL_ADDRESS_P (x); - } + x = XEXP (x, 0); + return (SYMBOL_REF_P (x) || CONST_INT_P (x)) +&& CONSTANT_POOL_ADDRESS_P (x); } + /* relaxed MOVI instructions, that will be converted to L32R by the + assembler. */ + if (CONST_INT_P (x) + && ! xtensa_simm12b (INTVAL (x))) +return true; + return false; } -- 2.20.1
[PATCH 1/2] xtensa: Apply a few minor fixes
No functional changes. gcc/ChangeLog: * config/xtensa/xtensa.cc (xtensa_emit_move_sequence): Use can_create_pseudo_p(), instead of using individual reload_in_progress and reload_completed. (xtensa_expand_block_set_small_loop): Use xtensa_simm8x256(), the existing predicate function. (xtensa_is_insn_L32R_p, gen_int_relational, xtensa_emit_sibcall): Use the standard RTX code predicate macros such as MEM_P, SYMBOL_REF_P and/or CONST_INT_P. * config/xtensa/xtensa.md: Avoid using numeric literals to determine if callee-saved register, at the split patterns for indirect sibcall fixups. --- gcc/config/xtensa/xtensa.cc | 16 gcc/config/xtensa/xtensa.md | 8 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index c5d00acdf2c..2c534ff9c60 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -752,7 +752,7 @@ gen_int_relational (enum rtx_code test_code, /* relational test (EQ, etc) */ } /* See if we need to invert the result. */ - invert = ((GET_CODE (cmp1) == CONST_INT) + invert = (CONST_INT_P (cmp1) ? p_info->invert_const : p_info->invert_reg); @@ -1209,7 +1209,7 @@ xtensa_emit_move_sequence (rtx *operands, machine_mode mode) } } - if (!(reload_in_progress | reload_completed) + if (can_create_pseudo_p () && !xtensa_valid_move (mode, operands)) operands[1] = force_reg (mode, operands[1]); @@ -1612,7 +1612,7 @@ xtensa_expand_block_set_small_loop (rtx *operands) thus limited to only offset to the end address for ADDI/ADDMI instruction. */ if (align == 4 - && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) + && ! (bytes <= 127 || xtensa_simm8x256 (bytes))) return 0; /* If no 4-byte aligned, loop count should be treated as the @@ -2169,7 +2169,7 @@ xtensa_emit_sibcall (int callop, rtx *operands) static char result[64]; rtx tgt = operands[callop]; - if (GET_CODE (tgt) == CONST_INT) + if (CONST_INT_P (tgt)) sprintf (result, "j.l\t" HOST_WIDE_INT_PRINT_HEX ", a9", INTVAL (tgt)); else if (register_operand (tgt, VOIDmode)) @@ -4282,17 +4282,17 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, } static bool -xtensa_is_insn_L32R_p(const rtx_insn *insn) +xtensa_is_insn_L32R_p (const rtx_insn *insn) { rtx x = PATTERN (insn); if (GET_CODE (x) == SET) { - x = XEXP (x, 1); - if (GET_CODE (x) == MEM) + x = SET_SRC (x); + if (MEM_P (x)) { x = XEXP (x, 0); - return (GET_CODE (x) == SYMBOL_REF || CONST_INT_P (x)) + return (SYMBOL_REF_P (x) || CONST_INT_P (x)) && CONSTANT_POOL_ADDRESS_P (x); } } diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md index ef6bbc451b0..84b975cf00e 100644 --- a/gcc/config/xtensa/xtensa.md +++ b/gcc/config/xtensa/xtensa.md @@ -1246,14 +1246,14 @@ int i = 0; rtx x = XEXP (operands[1], 0); long l[2]; - if (GET_CODE (x) == SYMBOL_REF + if (SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x)) x = get_pool_constant (x); else if (GET_CODE (x) == CONST) { x = XEXP (x, 0); gcc_assert (GET_CODE (x) == PLUS - && GET_CODE (XEXP (x, 0)) == SYMBOL_REF + && SYMBOL_REF_P (XEXP (x, 0)) && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))); i = INTVAL (XEXP (x, 1)); @@ -2212,7 +2212,7 @@ (match_operand 1 ""))] "reload_completed && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) - && IN_RANGE (REGNO (operands[0]), 12, 15)" + && ! call_used_or_fixed_reg_p (REGNO (operands[0]))" [(set (reg:SI A10_REG) (match_dup 0)) (call (mem:SI (reg:SI A10_REG)) @@ -2245,7 +2245,7 @@ (match_operand 2 "")))] "reload_completed && !TARGET_WINDOWED_ABI && SIBLING_CALL_P (insn) - && IN_RANGE (REGNO (operands[1]), 12, 15)" + && ! call_used_or_fixed_reg_p (REGNO (operands[1]))" [(set (reg:SI A10_REG) (match_dup 1)) (set (match_dup 0) -- 2.20.1
Re: [PATCH 1/2] xtensa: Apply a few minor fixes
On Sun, Jun 19, 2022 at 12:15 PM Takayuki 'January June' Suwa wrote: > > No functional changes. > > gcc/ChangeLog: > > * config/xtensa/xtensa.cc (xtensa_emit_move_sequence): > Use can_create_pseudo_p(), instead of using individual > reload_in_progress and reload_completed. > (xtensa_expand_block_set_small_loop): Use xtensa_simm8x256(), > the existing predicate function. > (xtensa_is_insn_L32R_p, gen_int_relational, xtensa_emit_sibcall): > Use the standard RTX code predicate macros such as MEM_P, > SYMBOL_REF_P and/or CONST_INT_P. > * config/xtensa/xtensa.md: Avoid using numeric literals to determine > if callee-saved register, at the split patterns for indirect sibcall > fixups. > --- > gcc/config/xtensa/xtensa.cc | 16 > gcc/config/xtensa/xtensa.md | 8 > 2 files changed, 12 insertions(+), 12 deletions(-) Regtested for target=xtensa-linux-uclibc, no new regressions. Committed to master. -- Thanks. -- Max
Re: [PATCH 2/2] xtensa: Fix RTL insn cost estimation about relaxed MOVI instructions
On Sun, Jun 19, 2022 at 12:15 PM Takayuki 'January June' Suwa wrote: > > These instructions will all be converted to L32R ones with litpool entries > by the assembler. > > gcc/ChangeLog: > > * config/xtensa/xtensa.cc (xtensa_is_insn_L32R_p): > Consider relaxed MOVI instructions as L32R. > --- > gcc/config/xtensa/xtensa.cc | 22 ++ > 1 file changed, 14 insertions(+), 8 deletions(-) Regtested for target=xtensa-linux-uclibc, no new regressions. Committed to master. -- Thanks. -- Max
[ping][PATCH v1.1] tree-optimization/105736: Don't let error_mark_node escape for ADDR_EXPR
Hello, ping! On 14/06/2022 21:01, Siddhesh Poyarekar wrote: The addr_expr computation does not check for error_mark_node before returning the size expression. This used to work in the constant case because the conversion to uhwi would end up causing it to return size_unknown, but that won't work for the dynamic case. Modify the control flow to explicitly return size_unknown if the offset computation returns an error_mark_node. gcc/ChangeLog: PR tree-optimization/105736 * tree-object-size.cc (addr_object_size): Return size_unknown when object offset computation returns an error. gcc/testsuite/ChangeLog: PR tree-optimization/105736 * gcc.dg/builtin-dynamic-object-size-0.c (TV4, val3, test_pr105736): New struct declaration, variable and function to test PR. (main): Use them. Signed-off-by: Siddhesh Poyarekar --- Changes from v1: - Used FAIL() instead of __builtin_abort() in the test. Tested: - x86_64 bootstrap and test - --with-build-config=bootstrap-ubsan build May I also backport this to gcc12? .../gcc.dg/builtin-dynamic-object-size-0.c| 18 + gcc/tree-object-size.cc | 20 ++- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c index b5b0b3a677c..01a280b2d7b 100644 --- a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c +++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c @@ -479,6 +479,20 @@ test_loop (int *obj, size_t sz, size_t start, size_t end, int incr) return __builtin_dynamic_object_size (ptr, 0); } +/* Other tests. */ + +struct TV4 +{ + __attribute__((vector_size (sizeof (int) * 4))) int v; +}; + +struct TV4 val3; +int * +test_pr105736 (struct TV4 *a) +{ + return &a->v[0]; +} + unsigned nfails = 0; #define FAIL() ({ \ @@ -633,6 +647,10 @@ main (int argc, char **argv) FAIL (); if (test_loop (arr, 42, 20, 52, 1) != 0) FAIL (); + /* pr105736. */ + int *t = test_pr105736 (&val3); + if (__builtin_dynamic_object_size (t, 0) != -1) +FAIL (); if (nfails > 0) __builtin_abort (); diff --git a/gcc/tree-object-size.cc b/gcc/tree-object-size.cc index 5ca87ae3504..12bc0868b77 100644 --- a/gcc/tree-object-size.cc +++ b/gcc/tree-object-size.cc @@ -695,19 +695,21 @@ addr_object_size (struct object_size_info *osi, const_tree ptr, var_size = pt_var_size; bytes = compute_object_offset (TREE_OPERAND (ptr, 0), var); if (bytes != error_mark_node) - bytes = size_for_offset (var_size, bytes); - if (var != pt_var - && pt_var_size - && TREE_CODE (pt_var) == MEM_REF - && bytes != error_mark_node) { - tree bytes2 = compute_object_offset (TREE_OPERAND (ptr, 0), pt_var); - if (bytes2 != error_mark_node) + bytes = size_for_offset (var_size, bytes); + if (var != pt_var && pt_var_size && TREE_CODE (pt_var) == MEM_REF) { - bytes2 = size_for_offset (pt_var_size, bytes2); - bytes = size_binop (MIN_EXPR, bytes, bytes2); + tree bytes2 = compute_object_offset (TREE_OPERAND (ptr, 0), + pt_var); + if (bytes2 != error_mark_node) + { + bytes2 = size_for_offset (pt_var_size, bytes2); + bytes = size_binop (MIN_EXPR, bytes, bytes2); + } } } + else + bytes = size_unknown (object_size_type); wholebytes = object_size_type & OST_SUBOBJECT ? var_size : pt_var_wholesize;
[PATCH v5, rs6000] Implemented f[min/max]_optab by xs[min/max]dp [PR103605]
Hi, This patch implements optab f[min/max]_optab by xs[min/max]dp on rs6000. Tests show that outputs of xs[min/max]dp are consistent with the standard of C99 fmin/max. This patch also binds __builtin_vsx_xs[min/max]dp to fmin/max instead of smin/max. So the builtins always generate xs[min/max]dp on all platforms. Compared with previous version, I added a condition check for finite_math_only in fmin/max insn. Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-06-20 Haochen Gui gcc/ PR target/105414 * match.pd (minmax): Skip constant folding for fmin/fmax when both arguments are sNaN or one is sNaN and another is NaN. gcc/testsuite/ PR target/105414 * gcc.dg/pr105414.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index f4a9f24bcc5..8b735493b40 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -1613,10 +1613,10 @@ XSCVSPDP vsx_xscvspdp {} const double __builtin_vsx_xsmaxdp (double, double); -XSMAXDP smaxdf3 {} +XSMAXDP fmaxdf3 {} const double __builtin_vsx_xsmindp (double, double); -XSMINDP smindf3 {} +XSMINDP fmindf3 {} const double __builtin_vsx_xsrdpi (double); XSRDPI vsx_xsrdpi {} diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index bf85baa5370..ae0dd98f0f9 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -158,6 +158,8 @@ (define_c_enum "unspec" UNSPEC_HASHCHK UNSPEC_XXSPLTIDP_CONST UNSPEC_XXSPLTIW_CONST + UNSPEC_FMAX + UNSPEC_FMIN ]) ;; @@ -5341,6 +5343,22 @@ (define_insn_and_split "*s3_fpr" DONE; }) + +(define_int_iterator FMINMAX [UNSPEC_FMAX UNSPEC_FMIN]) + +(define_int_attr minmax_op [(UNSPEC_FMAX "max") +(UNSPEC_FMIN "min")]) + +(define_insn "f3" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa") + (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa") + (match_operand:SFDF 2 "vsx_register_operand" "wa")] +FMINMAX))] + "TARGET_VSX && !flag_finite_math_only" + "xsdp %x0,%x1,%x2" + [(set_attr "type" "fp")] +) + (define_expand "movcc" [(set (match_operand:GPR 0 "gpc_reg_operand") (if_then_else:GPR (match_operand 1 "comparison_operator") diff --git a/gcc/testsuite/gcc.target/powerpc/pr103605.c b/gcc/testsuite/gcc.target/powerpc/pr103605.c new file mode 100644 index 000..e43ac40c2d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr103605.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O1 -mvsx" } */ +/* { dg-final { scan-assembler-times {\mxsmaxdp\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mxsmindp\M} 3 } } */ + +#include + +double test1 (double d0, double d1) +{ + return fmin (d0, d1); +} + +float test2 (float d0, float d1) +{ + return fmin (d0, d1); +} + +double test3 (double d0, double d1) +{ + return fmax (d0, d1); +} + +float test4 (float d0, float d1) +{ + return fmax (d0, d1); +} + +double test5 (double d0, double d1) +{ + return __builtin_vsx_xsmindp (d0, d1); +} + +double test6 (double d0, double d1) +{ + return __builtin_vsx_xsmaxdp (d0, d1); +}