[gcc r15-1804] tree-optimization/115764 - testcase for BB SLP issue
https://gcc.gnu.org/g:2be2145f4f14a79e4bb8e845168d7f0d25dc1b5b commit r15-1804-g2be2145f4f14a79e4bb8e845168d7f0d25dc1b5b Author: Richard Biener Date: Wed Jul 3 09:05:06 2024 +0200 tree-optimization/115764 - testcase for BB SLP issue The following adds a testcase for a CSE issue with BB SLP two operator handling when we make those CSE aware by providing SLP_TREE_SCALAR_STMTS for them. This was reduced from 526.blender_r. PR tree-optimization/115764 * gcc.dg/vect/bb-slp-76.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/vect/bb-slp-76.c | 30 ++ 1 file changed, 30 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-76.c b/gcc/testsuite/gcc.dg/vect/bb-slp-76.c new file mode 100644 index 000..b3b6a58e7c7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-76.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-ffast-math" } */ + +typedef struct { + float xmin, xmax; +} rctf; +int U_0; +float BLI_rctf_size_x_rct_1, view_zoomdrag_apply_dx; +void *view_zoomdrag_apply_op_0; +float RNA_float_get(); +typedef struct { + rctf cur; +} View2D; +typedef struct { + View2D v2d; +} v2dViewZoomData; +void view_zoomdrag_apply() { + v2dViewZoomData *vzd = view_zoomdrag_apply_op_0; + View2D *v2d = &vzd->v2d; + view_zoomdrag_apply_dx = RNA_float_get(); + if (U_0) { +float mval_fac = BLI_rctf_size_x_rct_1, mval_faci = mval_fac, + ofs = mval_faci * view_zoomdrag_apply_dx; +v2d->cur.xmin += ofs + view_zoomdrag_apply_dx; +v2d->cur.xmax += ofs - view_zoomdrag_apply_dx; + } else { +v2d->cur.xmin += view_zoomdrag_apply_dx; +v2d->cur.xmax -= view_zoomdrag_apply_dx; + } +}
[gcc r15-1805] RISC-V: Fix asm check failure for truncated after SAT_SUB
https://gcc.gnu.org/g:ab3e3d2f0564c2eb0640de3f4d0a50e1fcc8c318 commit r15-1805-gab3e3d2f0564c2eb0640de3f4d0a50e1fcc8c318 Author: Pan Li Date: Wed Jul 3 13:17:16 2024 +0800 RISC-V: Fix asm check failure for truncated after SAT_SUB It seems that the asm check is incorrect for truncated after SAT_SUB, we should take the vx check for vssubu instead of vv check. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c: Update vssubu check from vv to vx. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c: Ditto. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c index dd9e3999a29..1e380657d74 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c @@ -11,7 +11,7 @@ ** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*m1,\s*ta,\s*ma ** ... ** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) -** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+ +** vssubu\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[atx][0-9]+ ** vsetvli\s+zero,\s*zero,\s*e8,\s*mf2,\s*ta,\s*ma ** vncvt\.x\.x\.w\s+v[0-9]+,\s*v[0-9]+ ** ... diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c index 738d1465a01..d7b8931f0ec 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c @@ -11,7 +11,7 @@ ** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*m1,\s*ta,\s*ma ** ... ** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) -** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+ +** vssubu\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[atx][0-9]+ ** vsetvli\s+zero,\s*zero,\s*e16,\s*mf2,\s*ta,\s*ma ** vncvt\.x\.x\.w\s+v[0-9]+,\s*v[0-9]+ ** ... diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c index b008b21cf0c..edf42a1f776 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c @@ -11,7 +11,7 @@ ** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e64,\s*m1,\s*ta,\s*ma ** ... ** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\) -** vssubu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+ +** vssubu\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[atx][0-9]+ ** vsetvli\s+zero,\s*zero,\s*e32,\s*mf2,\s*ta,\s*ma ** vncvt\.x\.x\.w\s+v[0-9]+,\s*v[0-9]+ ** ...
[gcc r15-1806] Move runtime check into a separate function and guard it with target ("no-avx")
https://gcc.gnu.org/g:239ad907b1fc08874042f8bea5f61eaf3ba2877d commit r15-1806-g239ad907b1fc08874042f8bea5f61eaf3ba2877d Author: liuhongt Date: Wed Jul 3 14:47:33 2024 +0800 Move runtime check into a separate function and guard it with target ("no-avx") The patch can avoid SIGILL on non-AVX512 machine due to kmovd is generated in dynamic check. gcc/testsuite/ChangeLog: PR target/115748 * gcc.target/i386/avx512-check.h: Move runtime check into a separate function and guard it with target ("no-avx"). Diff: --- gcc/testsuite/gcc.target/i386/avx512-check.h | 14 +- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/avx512-check.h b/gcc/testsuite/gcc.target/i386/avx512-check.h index 0ad9064f637..71858a33dac 100644 --- a/gcc/testsuite/gcc.target/i386/avx512-check.h +++ b/gcc/testsuite/gcc.target/i386/avx512-check.h @@ -34,8 +34,9 @@ check_osxsave (void) return (ecx & bit_OSXSAVE) != 0; } +__attribute__((noipa,target("no-avx"))) int -main () +avx512_runtime_support_p () { unsigned int eax, ebx, ecx, edx; @@ -100,6 +101,17 @@ main () && (edx & bit_AVX512VP2INTERSECT) #endif && avx512f_os_support ()) +{ + return 1; +} + + return 0; +} + +int +main () +{ + if (avx512_runtime_support_p ()) { DO_TEST (); #ifdef DEBUG
[gcc r15-1807] Give fast DCE a separate dirty flag
https://gcc.gnu.org/g:47ea6bddd15a568cedc5d7026d2cc9d5599e6e01 commit r15-1807-g47ea6bddd15a568cedc5d7026d2cc9d5599e6e01 Author: Richard Sandiford Date: Wed Jul 3 09:17:42 2024 +0100 Give fast DCE a separate dirty flag Thomas pointed out that we sometimes failed to eliminate some dead code (specifically clobbers of otherwise unused registers) on nvptx when late-combine is enabled. This happens because: - combine is able to optimise the function in a way that exposes dead code. This leaves the df information in a "dirty" state. - late_combine calls df_analyze without DF_LR_RUN_DCE run set. This updates the df information and clears the "dirty" state. - late_combine doesn't find any extra optimisations, and so leaves the df information up-to-date. - if_after_combine (ce2) calls df_analyze with DF_LR_RUN_DCE set. Because the df information is already up-to-date, fast DCE is not run. The upshot is that running late-combine has the effect of suppressing a DCE opportunity that would have been noticed without late_combine. I think this shows that we should track the state of the DCE separately from the LR problem. Every pass updates the latter, but not all passes update the former. gcc/ * df.h (DF_LR_DCE): New df_problem_id. (df_lr_dce): New macro. * df-core.cc (rest_of_handle_df_finish): Check for a null free_fun. * df-problems.cc (df_lr_finalize): Split out fast DCE handling to... (df_lr_dce_finalize): ...this new function. (problem_LR_DCE): New df_problem. (df_lr_add_problem): Register LR_DCE rather than LR itself. * dce.cc (fast_dce): Clear df_lr_dce->solutions_dirty. Diff: --- gcc/dce.cc | 3 ++ gcc/df-core.cc | 3 +- gcc/df-problems.cc | 96 +- gcc/df.h | 2 ++ 4 files changed, 74 insertions(+), 30 deletions(-) diff --git a/gcc/dce.cc b/gcc/dce.cc index be1a2a87732..04e8d98818d 100644 --- a/gcc/dce.cc +++ b/gcc/dce.cc @@ -1182,6 +1182,9 @@ fast_dce (bool word_level) BITMAP_FREE (processed); BITMAP_FREE (redo_out); BITMAP_FREE (all_blocks); + + /* Both forms of DCE should make further DCE unnecessary. */ + df_lr_dce->solutions_dirty = false; } diff --git a/gcc/df-core.cc b/gcc/df-core.cc index b0e8a88d433..8fd778a8618 100644 --- a/gcc/df-core.cc +++ b/gcc/df-core.cc @@ -806,7 +806,8 @@ rest_of_handle_df_finish (void) for (i = 0; i < df->num_problems_defined; i++) { struct dataflow *dflow = df->problems_in_order[i]; - dflow->problem->free_fun (); + if (dflow->problem->free_fun) + dflow->problem->free_fun (); } free (df->postorder); diff --git a/gcc/df-problems.cc b/gcc/df-problems.cc index 88ee0dd67fc..bfd24bd1e86 100644 --- a/gcc/df-problems.cc +++ b/gcc/df-problems.cc @@ -1054,37 +1054,10 @@ df_lr_transfer_function (int bb_index) } -/* Run the fast dce as a side effect of building LR. */ - static void -df_lr_finalize (bitmap all_blocks) +df_lr_finalize (bitmap) { df_lr->solutions_dirty = false; - if (df->changeable_flags & DF_LR_RUN_DCE) -{ - run_fast_df_dce (); - - /* If dce deletes some instructions, we need to recompute the lr -solution before proceeding further. The problem is that fast -dce is a pessimestic dataflow algorithm. In the case where -it deletes a statement S inside of a loop, the uses inside of -S may not be deleted from the dataflow solution because they -were carried around the loop. While it is conservatively -correct to leave these extra bits, the standards of df -require that we maintain the best possible (least fixed -point) solution. The only way to do that is to redo the -iteration from the beginning. See PR35805 for an -example. */ - if (df_lr->solutions_dirty) - { - df_clear_flags (DF_LR_RUN_DCE); - df_lr_alloc (all_blocks); - df_lr_local_compute (all_blocks); - df_worklist_dataflow (df_lr, all_blocks, df->postorder, df->n_blocks); - df_lr_finalize (all_blocks); - df_set_flags (DF_LR_RUN_DCE); - } -} } @@ -1266,6 +1239,69 @@ static const struct df_problem problem_LR = false /* Reset blocks on dropping out of blocks_to_analyze. */ }; +/* Run the fast DCE after building LR. This is a separate problem so that + the "dirty" flag is only cleared after a DCE pass is actually run. */ + +static void +df_lr_dce_finalize (bitmap all_blocks) +{ + if (!(df->changeable_flags & DF_LR_RUN_DCE)) +return; + + /* Also clears df_lr_dce->solutions_dirty. */ + run_fast_df_dce (); + + /* If dce deletes some instructions, we need to recompute the lr + solution before proceeding further. The problem is that fast +
[gcc r15-1808] ivopts: fix wide_int_constant_multiple_p when VAL and DIV are 0. [PR114932]
https://gcc.gnu.org/g:25127123100f04c2d5d70c6933a5f5aedcd69c40 commit r15-1808-g25127123100f04c2d5d70c6933a5f5aedcd69c40 Author: Tamar Christina Date: Wed Jul 3 09:30:28 2024 +0100 ivopts: fix wide_int_constant_multiple_p when VAL and DIV are 0. [PR114932] wide_int_constant_multiple_p tries to check if for two tree expressions a and b that there is a multiplier which makes a == b * c. This code however seems to think that there's no c where a=0 and b=0 are equal which is of course wrong. This fixes it and also fixes the comment. gcc/ChangeLog: PR tree-optimization/114932 * tree-affine.cc (wide_int_constant_multiple_p): Support 0 and 0 being multiples. Diff: --- gcc/tree-affine.cc | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/gcc/tree-affine.cc b/gcc/tree-affine.cc index d6309c43903..76117aa4fd6 100644 --- a/gcc/tree-affine.cc +++ b/gcc/tree-affine.cc @@ -880,11 +880,11 @@ free_affine_expand_cache (hash_map **cache) *cache = NULL; } -/* If VAL != CST * DIV for any constant CST, returns false. - Otherwise, if *MULT_SET is true, additionally compares CST and MULT, - and if they are different, returns false. Finally, if neither of these - two cases occur, true is returned, and CST is stored to MULT and MULT_SET - is set to true. */ +/* If VAL == CST * DIV for any constant CST, returns true. + and if *MULT_SET is true, additionally compares CST and MULT + and if they are different, returns false. If true is returned, CST is + stored to MULT and MULT_SET is set to true unless VAL and DIV are both zero + in which case neither MULT nor MULT_SET are updated. */ static bool wide_int_constant_multiple_p (const poly_widest_int &val, @@ -895,6 +895,9 @@ wide_int_constant_multiple_p (const poly_widest_int &val, if (known_eq (val, 0)) { + if (known_eq (div, 0)) + return true; + if (*mult_set && maybe_ne (*mult, 0)) return false; *mult_set = true;
[gcc r15-1809] ivopts: replace constant_multiple_of with aff_combination_constant_multiple_p [PR114932]
https://gcc.gnu.org/g:735edbf1e2479fa2323a2b4a9714fae1a0925f74 commit r15-1809-g735edbf1e2479fa2323a2b4a9714fae1a0925f74 Author: Tamar Christina Date: Wed Jul 3 09:31:09 2024 +0100 ivopts: replace constant_multiple_of with aff_combination_constant_multiple_p [PR114932] The current implementation of constant_multiple_of is doing a more limited version of aff_combination_constant_multiple_p. The only non-debug usage of constant_multiple_of will proceed with the values as affine trees. There is scope for further optimization here, namely I believe that if constant_multiple_of returns the aff_tree after the conversion then get_computation_aff_1 can use it instead of manually creating the aff_tree. However I think it makes sense to first commit this smaller change and then incrementally change things. gcc/ChangeLog: PR tree-optimization/114932 * tree-ssa-loop-ivopts.cc (constant_multiple_of): Use aff_combination_constant_multiple_p instead. Diff: --- gcc/tree-ssa-loop-ivopts.cc | 66 ++--- 1 file changed, 8 insertions(+), 58 deletions(-) diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc index 7cae5bdefea..c3218a3e8ee 100644 --- a/gcc/tree-ssa-loop-ivopts.cc +++ b/gcc/tree-ssa-loop-ivopts.cc @@ -2146,65 +2146,15 @@ idx_record_use (tree base, tree *idx, static bool constant_multiple_of (tree top, tree bot, widest_int *mul) { - tree mby; - enum tree_code code; - unsigned precision = TYPE_PRECISION (TREE_TYPE (top)); - widest_int res, p0, p1; - - STRIP_NOPS (top); - STRIP_NOPS (bot); - - if (operand_equal_p (top, bot, 0)) -{ - *mul = 1; - return true; -} - - code = TREE_CODE (top); - switch (code) -{ -case MULT_EXPR: - mby = TREE_OPERAND (top, 1); - if (TREE_CODE (mby) != INTEGER_CST) - return false; - - if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res)) - return false; - - *mul = wi::sext (res * wi::to_widest (mby), precision); - return true; - -case PLUS_EXPR: -case MINUS_EXPR: - if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0) - || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1)) - return false; - - if (code == MINUS_EXPR) - p1 = -p1; - *mul = wi::sext (p0 + p1, precision); - return true; - -case INTEGER_CST: - if (TREE_CODE (bot) != INTEGER_CST) - return false; - - p0 = widest_int::from (wi::to_wide (top), SIGNED); - p1 = widest_int::from (wi::to_wide (bot), SIGNED); - if (p1 == 0) - return false; - *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision); - return res == 0; - -default: - if (POLY_INT_CST_P (top) - && POLY_INT_CST_P (bot) - && constant_multiple_p (wi::to_poly_widest (top), - wi::to_poly_widest (bot), mul)) - return true; + aff_tree aff_top, aff_bot; + tree_to_aff_combination (top, TREE_TYPE (top), &aff_top); + tree_to_aff_combination (bot, TREE_TYPE (bot), &aff_bot); + poly_widest_int poly_mul; + if (aff_combination_constant_multiple_p (&aff_top, &aff_bot, &poly_mul) + && poly_mul.is_constant (mul)) +return true; - return false; -} + return false; } /* Return true if memory reference REF with step STEP may be unaligned. */
[gcc r15-1810] AVR: target/98762 - Handle partial clobber in movqi output.
https://gcc.gnu.org/g:e9fb6efa1cf542353fd44ddcbb5136344c463fd0 commit r15-1810-ge9fb6efa1cf542353fd44ddcbb5136344c463fd0 Author: Georg-Johann Lay Date: Wed Jul 3 10:29:18 2024 +0200 AVR: target/98762 - Handle partial clobber in movqi output. PR target/98762 gcc/ * config/avr/avr.cc (avr_out_movqi_r_mr_reg_disp_tiny): Properly restore the base register when it is partially clobbered. gcc/testsuite/ * gcc.target/avr/torture/pr98762.c: New test. Diff: --- gcc/config/avr/avr.cc | 27 +- gcc/testsuite/gcc.target/avr/torture/pr98762.c | 19 ++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index a110af62cd5..f048bf5fd41 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -4838,13 +4838,30 @@ avr_out_movqi_r_mr_reg_disp_tiny (rtx_insn *insn, rtx op[], int *plen) rtx dest = op[0]; rtx src = op[1]; rtx x = XEXP (src, 0); + rtx base = XEXP (x, 0); - avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB - "ld %0,%b1" , op, plen, -3); + if (plen) +*plen = 0; - if (!reg_overlap_mentioned_p (dest, XEXP (x, 0)) - && !reg_unused_after (insn, XEXP (x, 0))) -avr_asm_len (TINY_SBIW (%I1, %J1, %o1), op, plen, 2); + if (!reg_overlap_mentioned_p (dest, base)) +{ + avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB + "ld %0,%b1", op, plen, 3); + if (!reg_unused_after (insn, base)) + avr_asm_len (TINY_SBIW (%I1, %J1, %o1), op, plen, 2); +} + else +{ + // PR98762: The base register overlaps dest and is only partly clobbered. + rtx base2 = all_regs_rtx[1 ^ REGNO (dest)]; + + if (!reg_unused_after (insn, base2)) + avr_asm_len ("mov __tmp_reg__,%0" , &base2, plen, 1); + avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB + "ld %0,%b1", op, plen, 3); + if (!reg_unused_after (insn, base2)) + avr_asm_len ("mov %0,__tmp_reg__" , &base2, plen, 1); +} return ""; } diff --git a/gcc/testsuite/gcc.target/avr/torture/pr98762.c b/gcc/testsuite/gcc.target/avr/torture/pr98762.c new file mode 100644 index 000..c3ba7da69a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/torture/pr98762.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-additional-options "-std=c99" } */ + +long long acc = 0x1122334455667788; + +__attribute__((noinline,noclone)) +void addhi (short a) +{ + acc += (long long) a << 32; +} + +int main (void) +{ + addhi (0x0304); + if (acc != 0x1122364855667788) +__builtin_abort(); + + return 0; +}
[gcc r14-10373] AVR: target/98762 - Handle partial clobber in movqi output.
https://gcc.gnu.org/g:55744507abc5240fe1a59a6251f815a0d6217fe8 commit r14-10373-g55744507abc5240fe1a59a6251f815a0d6217fe8 Author: Georg-Johann Lay Date: Wed Jul 3 10:29:18 2024 +0200 AVR: target/98762 - Handle partial clobber in movqi output. PR target/98762 gcc/ * config/avr/avr.cc (avr_out_movqi_r_mr_reg_disp_tiny): Properly restore the base register when it is partially clobbered. gcc/testsuite/ * gcc.target/avr/torture/pr98762.c: New test. (cherry picked from commit e9fb6efa1cf542353fd44ddcbb5136344c463fd0) Diff: --- gcc/config/avr/avr.cc | 27 +- gcc/testsuite/gcc.target/avr/torture/pr98762.c | 19 ++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index e516c19322c..b41592ff1a3 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -4838,13 +4838,30 @@ avr_out_movqi_r_mr_reg_disp_tiny (rtx_insn *insn, rtx op[], int *plen) rtx dest = op[0]; rtx src = op[1]; rtx x = XEXP (src, 0); + rtx base = XEXP (x, 0); - avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB - "ld %0,%b1" , op, plen, -3); + if (plen) +*plen = 0; - if (!reg_overlap_mentioned_p (dest, XEXP (x, 0)) - && !reg_unused_after (insn, XEXP (x, 0))) -avr_asm_len (TINY_SBIW (%I1, %J1, %o1), op, plen, 2); + if (!reg_overlap_mentioned_p (dest, base)) +{ + avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB + "ld %0,%b1", op, plen, 3); + if (!reg_unused_after (insn, base)) + avr_asm_len (TINY_SBIW (%I1, %J1, %o1), op, plen, 2); +} + else +{ + // PR98762: The base register overlaps dest and is only partly clobbered. + rtx base2 = all_regs_rtx[1 ^ REGNO (dest)]; + + if (!reg_unused_after (insn, base2)) + avr_asm_len ("mov __tmp_reg__,%0" , &base2, plen, 1); + avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB + "ld %0,%b1", op, plen, 3); + if (!reg_unused_after (insn, base2)) + avr_asm_len ("mov %0,__tmp_reg__" , &base2, plen, 1); +} return ""; } diff --git a/gcc/testsuite/gcc.target/avr/torture/pr98762.c b/gcc/testsuite/gcc.target/avr/torture/pr98762.c new file mode 100644 index 000..c3ba7da69a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/torture/pr98762.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-additional-options "-std=c99" } */ + +long long acc = 0x1122334455667788; + +__attribute__((noinline,noclone)) +void addhi (short a) +{ + acc += (long long) a << 32; +} + +int main (void) +{ + addhi (0x0304); + if (acc != 0x1122364855667788) +__builtin_abort(); + + return 0; +}
[gcc r13-8887] AVR: target/98762 - Handle partial clobber in movqi output.
https://gcc.gnu.org/g:ba9fef4bdea32ca5c121a1baba02450faf2b commit r13-8887-gba9fef4bdea32ca5c121a1baba02450faf2b Author: Georg-Johann Lay Date: Wed Jul 3 10:29:18 2024 +0200 AVR: target/98762 - Handle partial clobber in movqi output. PR target/98762 gcc/ * config/avr/avr.cc (avr_out_movqi_r_mr_reg_disp_tiny): Properly restore the base register when it is partially clobbered. gcc/testsuite/ * gcc.target/avr/torture/pr98762.c: New test. (cherry picked from commit e9fb6efa1cf542353fd44ddcbb5136344c463fd0) Diff: --- gcc/config/avr/avr.cc | 27 +- gcc/testsuite/gcc.target/avr/torture/pr98762.c | 19 ++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 36ad0f23162..676ab2405db 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -4245,13 +4245,30 @@ avr_out_movqi_r_mr_reg_disp_tiny (rtx_insn *insn, rtx op[], int *plen) rtx dest = op[0]; rtx src = op[1]; rtx x = XEXP (src, 0); + rtx base = XEXP (x, 0); - avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB - "ld %0,%b1" , op, plen, -3); + if (plen) +*plen = 0; - if (!reg_overlap_mentioned_p (dest, XEXP (x, 0)) - && !reg_unused_after (insn, XEXP (x, 0))) -avr_asm_len (TINY_SBIW (%I1, %J1, %o1), op, plen, 2); + if (!reg_overlap_mentioned_p (dest, base)) +{ + avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB + "ld %0,%b1", op, plen, 3); + if (!reg_unused_after (insn, base)) + avr_asm_len (TINY_SBIW (%I1, %J1, %o1), op, plen, 2); +} + else +{ + // PR98762: The base register overlaps dest and is only partly clobbered. + rtx base2 = all_regs_rtx[1 ^ REGNO (dest)]; + + if (!reg_unused_after (insn, base2)) + avr_asm_len ("mov __tmp_reg__,%0" , &base2, plen, 1); + avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB + "ld %0,%b1", op, plen, 3); + if (!reg_unused_after (insn, base2)) + avr_asm_len ("mov %0,__tmp_reg__" , &base2, plen, 1); +} return ""; } diff --git a/gcc/testsuite/gcc.target/avr/torture/pr98762.c b/gcc/testsuite/gcc.target/avr/torture/pr98762.c new file mode 100644 index 000..c3ba7da69a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/torture/pr98762.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-additional-options "-std=c99" } */ + +long long acc = 0x1122334455667788; + +__attribute__((noinline,noclone)) +void addhi (short a) +{ + acc += (long long) a << 32; +} + +int main (void) +{ + addhi (0x0304); + if (acc != 0x1122364855667788) +__builtin_abort(); + + return 0; +}
[gcc r15-1811] Handle NULL stmt in SLP_TREE_SCALAR_STMTS
https://gcc.gnu.org/g:03a810da10d8dfb5aec9261372cad7bf090e6986 commit r15-1811-g03a810da10d8dfb5aec9261372cad7bf090e6986 Author: Richard Biener Date: Fri Jun 28 16:04:13 2024 +0200 Handle NULL stmt in SLP_TREE_SCALAR_STMTS The following starts to handle NULL elements in SLP_TREE_SCALAR_STMTS with the first candidate being the two-operator nodes where some lanes are do-not-care and also do not have a scalar stmt computing the result. I originally added SLP_TREE_SCALAR_STMTS to two-operator nodes but this exposes PR115764, so I've split that out. I have a patch use NULL elements for loads from groups with gaps where we get around not doing that by having a load permutation. * tree-vect-slp.cc (bst_traits::hash): Handle NULL elements in SLP_TREE_SCALAR_STMTS. (vect_print_slp_tree): Likewise. (vect_mark_slp_stmts): Likewise. (vect_mark_slp_stmts_relevant): Likewise. (vect_find_last_scalar_stmt_in_slp): Likewise. (vect_bb_slp_mark_live_stmts): Likewise. (vect_slp_prune_covered_roots): Likewise. (vect_bb_partition_graph_r): Likewise. (vect_remove_slp_scalar_calls): Likewise. (vect_slp_gather_vectorized_scalar_stmts): Likewise. (vect_bb_slp_scalar_cost): Likewise. (vect_contains_pattern_stmt_p): Likewise. (vect_slp_convert_to_external): Likewise. (vect_find_first_scalar_stmt_in_slp): Likewise. (vect_optimize_slp_pass::remove_redundant_permutations): Likewise. (vect_slp_analyze_node_operations_1): Likewise. (vect_schedule_slp_node): Likewise. * tree-vect-stmts.cc (can_vectorize_live_stmts): Likewise. (vectorizable_shift): Likewise. * tree-vect-data-refs.cc (vect_slp_analyze_load_dependences): Handle NULL elements in SLP_TREE_SCALAR_STMTS. Diff: --- gcc/tree-vect-data-refs.cc | 2 ++ gcc/tree-vect-slp.cc | 76 -- gcc/tree-vect-stmts.cc | 22 -- 3 files changed, 61 insertions(+), 39 deletions(-) diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 959e127c385..39fd887a96b 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -1041,6 +1041,8 @@ vect_slp_analyze_load_dependences (vec_info *vinfo, slp_tree node, for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k) { + if (! SLP_TREE_SCALAR_STMTS (node)[k]) + continue; stmt_vec_info access_info = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]); if (access_info == first_access_info) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 48e0f9d2705..22ed59a817d 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -355,7 +355,7 @@ vect_contains_pattern_stmt_p (vec stmts) stmt_vec_info stmt_info; unsigned int i; FOR_EACH_VEC_ELT (stmts, i, stmt_info) -if (is_pattern_stmt_p (stmt_info)) +if (stmt_info && is_pattern_stmt_p (stmt_info)) return true; return false; } @@ -1591,7 +1591,7 @@ bst_traits::hash (value_type x) { inchash::hash h; for (unsigned i = 0; i < x.length (); ++i) -h.add_int (gimple_uid (x[i]->stmt)); +h.add_int (x[i] ? gimple_uid (x[i]->stmt) : -1); return h.end (); } inline bool @@ -2800,9 +2800,12 @@ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc, } if (SLP_TREE_SCALAR_STMTS (node).exists ()) FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) - dump_printf_loc (metadata, user_loc, "\t%sstmt %u %G", - STMT_VINFO_LIVE_P (stmt_info) ? "[l] " : "", - i, stmt_info->stmt); + if (stmt_info) + dump_printf_loc (metadata, user_loc, "\t%sstmt %u %G", +STMT_VINFO_LIVE_P (stmt_info) ? "[l] " : "", +i, stmt_info->stmt); + else + dump_printf_loc (metadata, user_loc, "\tstmt %u ---\n", i); else { dump_printf_loc (metadata, user_loc, "\t{ "); @@ -2943,7 +2946,8 @@ vect_mark_slp_stmts (slp_tree node, hash_set &visited) return; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) -STMT_SLP_TYPE (stmt_info) = pure_slp; +if (stmt_info) + STMT_SLP_TYPE (stmt_info) = pure_slp; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) if (child) @@ -2973,11 +2977,12 @@ vect_mark_slp_stmts_relevant (slp_tree node, hash_set &visited) return; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) -{ - gcc_assert (!STMT_VINFO_RELEVANT (stmt_info) - || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope); - STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope; -} +if (stmt_info) + { + gcc_assert (!STMT_VINFO_RELEVANT (stmt_info) + || STMT_VI
[gcc r12-10596] AVR: target/98762 - Handle partial clobber in movqi output.
https://gcc.gnu.org/g:5f699cb08eed44a903393f601009e9c6d0b59c59 commit r12-10596-g5f699cb08eed44a903393f601009e9c6d0b59c59 Author: Georg-Johann Lay Date: Wed Jul 3 10:29:18 2024 +0200 AVR: target/98762 - Handle partial clobber in movqi output. PR target/98762 gcc/ * config/avr/avr.cc (avr_out_movqi_r_mr_reg_disp_tiny): Properly restore the base register when it is partially clobbered. gcc/testsuite/ * gcc.target/avr/torture/pr98762.c: New test. (cherry picked from commit e9fb6efa1cf542353fd44ddcbb5136344c463fd0) Diff: --- gcc/config/avr/avr.cc | 27 +- gcc/testsuite/gcc.target/avr/torture/pr98762.c | 19 ++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index ee033d3204d..f355146f992 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -4223,13 +4223,30 @@ avr_out_movqi_r_mr_reg_disp_tiny (rtx_insn *insn, rtx op[], int *plen) rtx dest = op[0]; rtx src = op[1]; rtx x = XEXP (src, 0); + rtx base = XEXP (x, 0); - avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB - "ld %0,%b1" , op, plen, -3); + if (plen) +*plen = 0; - if (!reg_overlap_mentioned_p (dest, XEXP (x, 0)) - && !reg_unused_after (insn, XEXP (x, 0))) -avr_asm_len (TINY_SBIW (%I1, %J1, %o1), op, plen, 2); + if (!reg_overlap_mentioned_p (dest, base)) +{ + avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB + "ld %0,%b1", op, plen, 3); + if (!reg_unused_after (insn, base)) + avr_asm_len (TINY_SBIW (%I1, %J1, %o1), op, plen, 2); +} + else +{ + // PR98762: The base register overlaps dest and is only partly clobbered. + rtx base2 = all_regs_rtx[1 ^ REGNO (dest)]; + + if (!reg_unused_after (insn, base2)) + avr_asm_len ("mov __tmp_reg__,%0" , &base2, plen, 1); + avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB + "ld %0,%b1", op, plen, 3); + if (!reg_unused_after (insn, base2)) + avr_asm_len ("mov %0,__tmp_reg__" , &base2, plen, 1); +} return ""; } diff --git a/gcc/testsuite/gcc.target/avr/torture/pr98762.c b/gcc/testsuite/gcc.target/avr/torture/pr98762.c new file mode 100644 index 000..c3ba7da69a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/torture/pr98762.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-additional-options "-std=c99" } */ + +long long acc = 0x1122334455667788; + +__attribute__((noinline,noclone)) +void addhi (short a) +{ + acc += (long long) a << 32; +} + +int main (void) +{ + addhi (0x0304); + if (acc != 0x1122364855667788) +__builtin_abort(); + + return 0; +}
[gcc r15-1813] aarch64: PR target/115475 Implement missing __ARM_FEATURE_SVE_BF16 macro
https://gcc.gnu.org/g:6492c7130d6ae9992298fc3d072e2589d1131376 commit r15-1813-g6492c7130d6ae9992298fc3d072e2589d1131376 Author: Kyrylo Tkachov Date: Fri Jun 28 13:22:37 2024 +0530 aarch64: PR target/115475 Implement missing __ARM_FEATURE_SVE_BF16 macro The ACLE requires __ARM_FEATURE_SVE_BF16 to be enabled when SVE and BF16 and the associated intrinsics are available. GCC does support the required intrinsics for TARGET_SVE_BF16 so define this macro too. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ PR target/115475 * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define __ARM_FEATURE_SVE_BF16 for TARGET_SVE_BF16. gcc/testsuite/ PR target/115475 * gcc.target/aarch64/acle/bf16_sve_feature.c: New test. Signed-off-by: Kyrylo Tkachov Diff: --- gcc/config/aarch64/aarch64-c.cc | 3 +++ gcc/testsuite/gcc.target/aarch64/acle/bf16_sve_feature.c | 10 ++ 2 files changed, 13 insertions(+) diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index f5d70339e4e..2aff097dd33 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -254,6 +254,9 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile); aarch64_def_or_undef (TARGET_BF16_FP, "__ARM_FEATURE_BF16", pfile); + aarch64_def_or_undef (TARGET_SVE_BF16, + "__ARM_FEATURE_SVE_BF16", pfile); + aarch64_def_or_undef (TARGET_LS64, "__ARM_FEATURE_LS64", pfile); aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile); diff --git a/gcc/testsuite/gcc.target/aarch64/acle/bf16_sve_feature.c b/gcc/testsuite/gcc.target/aarch64/acle/bf16_sve_feature.c new file mode 100644 index 000..cb3ddac71a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/acle/bf16_sve_feature.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ + +#pragma GCC target "+sve+bf16" +#ifndef __ARM_FEATURE_SVE_BF16 +#error "__ARM_FEATURE_SVE_BF16 is not defined but should be!" +#endif + +void +foo (void) {} +
[gcc r15-1812] aarch64: PR target/115457 Implement missing __ARM_FEATURE_BF16 macro
https://gcc.gnu.org/g:c10942134fa759843ac1ed1424b86fcb8e6368ba commit r15-1812-gc10942134fa759843ac1ed1424b86fcb8e6368ba Author: Kyrylo Tkachov Date: Thu Jun 27 16:10:41 2024 +0530 aarch64: PR target/115457 Implement missing __ARM_FEATURE_BF16 macro The ACLE asks the user to test for __ARM_FEATURE_BF16 before using the header but GCC doesn't set this up. LLVM does, so this is an inconsistency between the compilers. This patch enables that macro for TARGET_BF16_FP. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ PR target/115457 * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define __ARM_FEATURE_BF16 for TARGET_BF16_FP. gcc/testsuite/ PR target/115457 * gcc.target/aarch64/acle/bf16_feature.c: New test. Signed-off-by: Kyrylo Tkachov Diff: --- gcc/config/aarch64/aarch64-c.cc | 2 ++ gcc/testsuite/gcc.target/aarch64/acle/bf16_feature.c | 10 ++ 2 files changed, 12 insertions(+) diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index d042e5fbd8c..f5d70339e4e 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -252,6 +252,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile); aarch64_def_or_undef (TARGET_BF16_FP, "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile); + aarch64_def_or_undef (TARGET_BF16_FP, + "__ARM_FEATURE_BF16", pfile); aarch64_def_or_undef (TARGET_LS64, "__ARM_FEATURE_LS64", pfile); aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile); diff --git a/gcc/testsuite/gcc.target/aarch64/acle/bf16_feature.c b/gcc/testsuite/gcc.target/aarch64/acle/bf16_feature.c new file mode 100644 index 000..96584b4b988 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/acle/bf16_feature.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ + +#pragma GCC target "+bf16" +#ifndef __ARM_FEATURE_BF16 +#error "__ARM_FEATURE_BF16 is not defined but should be!" +#endif + +void +foo (void) {} +
[gcc r15-1814] [i386] restore recompute to override opts after change [PR113719]
https://gcc.gnu.org/g:bf2fc0a27b35de039c3d45e6d7ea9ad0a8a305ba commit r15-1814-gbf2fc0a27b35de039c3d45e6d7ea9ad0a8a305ba Author: Alexandre Oliva Date: Thu Jun 13 00:12:47 2024 -0300 [i386] restore recompute to override opts after change [PR113719] The first patch for PR113719 regressed gcc.dg/ipa/iinline-attr.c on toolchains configured to --enable-frame-pointer, because the optimization node created within handle_optimize_attribute had flag_omit_frame_pointer incorrectly set, whereas default_optimization_node didn't. With this difference, can_inline_edge_by_limits_p flagged an optimization mismatch and we refused to inline the function that had a redundant optimization flag into one that didn't, which is exactly what is tested for there. This patch restores the calls to ix86_default_align and ix86_recompute_optlev_based_flags that used to be, and ought to be, issued during TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE, but preserves the intent of the original change, of having those functions called at different spots within ix86_option_override_internal. To that end, the remaining bits were refactored into a separate function, that was in turn adjusted to operate on explicitly-passed opts and opts_set, rather than going for their global counterparts. for gcc/ChangeLog PR target/113719 * config/i386/i386-options.cc (ix86_override_options_after_change_1): Add opts and opts_set parms, operate on them, after factoring out of... (ix86_override_options_after_change): ... this. Restore calls of ix86_default_align and ix86_recompute_optlev_based_flags. (ix86_option_override_internal): Call the factored-out bits. Diff: --- gcc/config/i386/i386-options.cc | 59 - 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 1ef2c71a7a2..5824c0cb072 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -1913,37 +1913,58 @@ ix86_recompute_optlev_based_flags (struct gcc_options *opts, } } -/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */ +/* Implement part of TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */ -void -ix86_override_options_after_change (void) +static void +ix86_override_options_after_change_1 (struct gcc_options *opts, + struct gcc_options *opts_set) { +#define OPTS_SET_P(OPTION) opts_set->x_ ## OPTION +#define OPTS(OPTION) opts->x_ ## OPTION + /* Disable unrolling small loops when there's explicit -f{,no}unroll-loop. */ - if ((OPTION_SET_P (flag_unroll_loops)) - || (OPTION_SET_P (flag_unroll_all_loops) -&& flag_unroll_all_loops)) + if ((OPTS_SET_P (flag_unroll_loops)) + || (OPTS_SET_P (flag_unroll_all_loops) +&& OPTS (flag_unroll_all_loops))) { - if (!OPTION_SET_P (ix86_unroll_only_small_loops)) - ix86_unroll_only_small_loops = 0; + if (!OPTS_SET_P (ix86_unroll_only_small_loops)) + OPTS (ix86_unroll_only_small_loops) = 0; /* Re-enable -frename-registers and -fweb if funroll-loops enabled. */ - if (!OPTION_SET_P (flag_web)) - flag_web = flag_unroll_loops; - if (!OPTION_SET_P (flag_rename_registers)) - flag_rename_registers = flag_unroll_loops; + if (!OPTS_SET_P (flag_web)) + OPTS (flag_web) = OPTS (flag_unroll_loops); + if (!OPTS_SET_P (flag_rename_registers)) + OPTS (flag_rename_registers) = OPTS (flag_unroll_loops); /* -fcunroll-grow-size default follws -f[no]-unroll-loops. */ - if (!OPTION_SET_P (flag_cunroll_grow_size)) - flag_cunroll_grow_size = flag_unroll_loops -|| flag_peel_loops -|| optimize >= 3; + if (!OPTS_SET_P (flag_cunroll_grow_size)) + OPTS (flag_cunroll_grow_size) + = (OPTS (flag_unroll_loops) +|| OPTS (flag_peel_loops) +|| OPTS (optimize) >= 3); } else { - if (!OPTION_SET_P (flag_cunroll_grow_size)) - flag_cunroll_grow_size = flag_peel_loops || optimize >= 3; + if (!OPTS_SET_P (flag_cunroll_grow_size)) + OPTS (flag_cunroll_grow_size) + = (OPTS (flag_peel_loops) +|| OPTS (optimize) >= 3); } +#undef OPTS +#undef OPTS_SET_P +} + +/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */ + +void +ix86_override_options_after_change (void) +{ + ix86_default_align (&global_options); + + ix86_recompute_optlev_based_flags (&global_options, &global_options_set); + + ix86_override_options_after_change_1 (&global_options, &global_options_set); } /* Clear stack slot assignments remembered from previous functions. @@ -2493,7 +2514,7 @@ ix86_option_override_internal (bool main_args_p, ix86_recompute_optl
[gcc r15-1815] [debug] Avoid dropping bits from num/den in fixed-point types
https://gcc.gnu.org/g:113c4826f5e1ff88a9e1625980ff0b617583279c commit r15-1815-g113c4826f5e1ff88a9e1625980ff0b617583279c Author: Alexandre Oliva Date: Wed Jul 3 07:33:00 2024 -0300 [debug] Avoid dropping bits from num/den in fixed-point types We used to use an unsigned 128-bit type to hold the numerator and denominator used to represent the delta of a fixed-point type in debug information, but there are cases in which that was not enough, and more significant bits silently overflowed and got omitted from debug information. Introduce a mode in which UI_to_gnu selects a wide-enough unsigned type, and use that to convert numerator and denominator. While at that, avoid exceeding the maximum precision for wide ints, and for available int modes, when selecting a type to represent very wide constants, falling back to 0/0 for unrepresentable fractions. for gcc/ada/ChangeLog * gcc-interface/cuintp.cc (UI_To_gnu): Add mode that selects a wide enough unsigned type. Fail if the constant exceeds the representable numbers. * gcc-interface/decl.cc (gnat_to_gnu_entity): Use it for numerator and denominator of fixed-point types. In case of failure, fall back to an indeterminate fraction. Diff: --- gcc/ada/gcc-interface/cuintp.cc | 66 ++--- gcc/ada/gcc-interface/decl.cc | 19 +--- 2 files changed, 64 insertions(+), 21 deletions(-) diff --git a/gcc/ada/gcc-interface/cuintp.cc b/gcc/ada/gcc-interface/cuintp.cc index cdf6c019750..1903c5af0f1 100644 --- a/gcc/ada/gcc-interface/cuintp.cc +++ b/gcc/ada/gcc-interface/cuintp.cc @@ -35,6 +35,7 @@ #include "tree.h" #include "inchash.h" #include "fold-const.h" +#include "stor-layout.h" #include "ada.h" #include "types.h" @@ -67,7 +68,9 @@ build_cst_from_int (tree type, HOST_WIDE_INT low) /* Similar to UI_To_Int, but return a GCC INTEGER_CST or REAL_CST node, depending on whether TYPE is an integral or real type. Overflow is tested by the constant-folding used to build the node. TYPE is the GCC type of - the resulting node. */ + the resulting node. If TYPE is NULL, an unsigned integer type wide enough + to hold the entire constant is selected, and if no such type exists, + return NULL_TREE. */ tree UI_To_gnu (Uint Input, tree type) @@ -77,8 +80,10 @@ UI_To_gnu (Uint Input, tree type) any such possible value for intermediate computations and then rely on a conversion back to TYPE to perform the bias adjustment when need be. */ tree comp_type -= TREE_CODE (type) == INTEGER_TYPE && TYPE_BIASED_REPRESENTATION_P (type) - ? get_base_type (type) : type; += (!type ? gnat_type_for_size (32, 1) + : (TREE_CODE (type) == INTEGER_TYPE + && TYPE_BIASED_REPRESENTATION_P (type)) + ? get_base_type (type) : type); tree gnu_ret; if (Input <= Uint_Direct_Last) @@ -88,9 +93,14 @@ UI_To_gnu (Uint Input, tree type) Int Idx = (*Uints_Ptr)[Input - Uint_Table_Start].Loc; Pos Length = (*Uints_Ptr)[Input - Uint_Table_Start].Length; Int First = (*Udigits_Ptr)[Idx]; + tree_code code = First < 0 ? MINUS_EXPR : PLUS_EXPR; tree gnu_base; gcc_assert (Length > 0); + /* The extension of unsigned types we use to try to fit the +constant only works if we're dealing with nonnegative +constants, but that's what we expect when !TYPE. */ + gcc_assert (type || First >= 0); /* The computations we perform below always require a type at least as large as an integer not to overflow. FP types are always fine, but @@ -103,22 +113,44 @@ UI_To_gnu (Uint Input, tree type) gnu_base = build_cst_from_int (comp_type, Base); gnu_ret = build_cst_from_int (comp_type, First); - if (First < 0) - for (Idx++, Length--; Length; Idx++, Length--) - gnu_ret = fold_build2 (MINUS_EXPR, comp_type, -fold_build2 (MULT_EXPR, comp_type, - gnu_ret, gnu_base), -build_cst_from_int (comp_type, -(*Udigits_Ptr)[Idx])); - else - for (Idx++, Length--; Length; Idx++, Length--) - gnu_ret = fold_build2 (PLUS_EXPR, comp_type, -fold_build2 (MULT_EXPR, comp_type, - gnu_ret, gnu_base), -build_cst_from_int (comp_type, -(*Udigits_Ptr)[Idx])); + for (Idx++, Length--; Length; Idx++, Length--) + for (;;) + { + tree elt, scaled, next_ret; + elt = build_cst_from_int (comp_type, (*Udigits_Ptr)[Idx]); + /* We want to detect overflows with an unsigned type when + TYPE is not given
[gcc r15-1816] Deduplicate explicitly-sized types
https://gcc.gnu.org/g:640f0f3e2b771e23665924f24527e6b1a5db8d3c commit r15-1816-g640f0f3e2b771e23665924f24527e6b1a5db8d3c Author: Alexandre Oliva Date: Wed Jul 3 07:33:08 2024 -0300 Deduplicate explicitly-sized types When make_type_from_size is called with a biased type, for an entity that isn't explicitly biased, we may refrain from reusing the given type because it doesn't seem to match, and then proceed to create an exact copy of that type. Compute earlier the biased status of the expected type, early enough for the suitability check of the given type. Modify for_biased instead of biased_p, so that biased_p remains with the given type's status for the comparison. Avoid creating unnecessary copies of types in make_type_from_size, by caching and reusing previously-created identical types, similarly to the caching of packable types. While at that, fix two vaguely related issues: - TYPE_DEBUG_TYPE's storage is shared with other sorts of references to types, so it shouldn't be accessed unless TYPE_CAN_HAVE_DEBUG_TYPE_P holds. - When we choose the narrower/packed variant of a type as the main debug info type, we fail to output its name if we fail to follow debug type for the TYPE_NAME decl type in modified_type_die. for gcc/ada/ChangeLog * gcc-interface/misc.cc (gnat_get_array_descr_info): Only follow TYPE_DEBUG_TYPE if TYPE_CAN_HAVE_DEBUG_TYPE_P. * gcc-interface/utils.cc (sized_type_hash): New struct. (sized_type_hasher): New struct. (sized_type_hash_table): New variable. (init_gnat_utils): Allocate it. (destroy_gnat_utils): Release it. (sized_type_hasher::equal): New. (hash_sized_type): New. (canonicalize_sized_type): New. (make_type_from_size): Use it to cache packed variants. Fix type reuse by combining biased_p and for_biased earlier. Hold the combination in for_biased, adjusting later uses. for gcc/ChangeLog * dwarf2out.cc (modified_type_die): Follow name's debug type. for gcc/testsuite/ChangeLog * gnat.dg/bias1.adb: Count occurrences of -7.*DW_AT_GNU_bias. Diff: --- gcc/ada/gcc-interface/misc.cc | 3 +- gcc/ada/gcc-interface/utils.cc | 116 ++-- gcc/dwarf2out.cc| 7 ++- gcc/testsuite/gnat.dg/bias1.adb | 3 +- 4 files changed, 120 insertions(+), 9 deletions(-) diff --git a/gcc/ada/gcc-interface/misc.cc b/gcc/ada/gcc-interface/misc.cc index 4f6f6774fe7..f77629ce70b 100644 --- a/gcc/ada/gcc-interface/misc.cc +++ b/gcc/ada/gcc-interface/misc.cc @@ -967,7 +967,8 @@ gnat_get_array_descr_info (const_tree const_type, while (true) { - if (TYPE_DEBUG_TYPE (source_element_type)) + if (TYPE_CAN_HAVE_DEBUG_TYPE_P (source_element_type) + && TYPE_DEBUG_TYPE (source_element_type)) source_element_type = TYPE_DEBUG_TYPE (source_element_type); else if (TYPE_IS_PADDING_P (source_element_type)) source_element_type diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc index 0eb9af8d4a2..66e3192ea4f 100644 --- a/gcc/ada/gcc-interface/utils.cc +++ b/gcc/ada/gcc-interface/utils.cc @@ -364,6 +364,26 @@ struct pad_type_hasher : ggc_cache_ptr_hash static GTY ((cache)) hash_table *pad_type_hash_table; +struct GTY((for_user)) sized_type_hash +{ + hashval_t hash; + tree type; +}; + +struct sized_type_hasher : ggc_cache_ptr_hash +{ + static inline hashval_t hash (sized_type_hash *t) { return t->hash; } + static bool equal (sized_type_hash *a, sized_type_hash *b); + + static int + keep_cache_entry (sized_type_hash *&t) + { +return ggc_marked_p (t->type); + } +}; + +static GTY ((cache)) hash_table *sized_type_hash_table; + static tree merge_sizes (tree, tree, tree, bool, bool); static tree fold_bit_position (const_tree); static tree compute_related_constant (tree, tree); @@ -421,6 +441,9 @@ init_gnat_utils (void) /* Initialize the hash table of padded types. */ pad_type_hash_table = hash_table::create_ggc (512); + + /* Initialize the hash table of sized types. */ + sized_type_hash_table = hash_table::create_ggc (512); } /* Destroy data structures of the utils.cc module. */ @@ -443,6 +466,10 @@ destroy_gnat_utils (void) /* Destroy the hash table of padded types. */ pad_type_hash_table->empty (); pad_type_hash_table = NULL; + + /* Destroy the hash table of sized types. */ + sized_type_hash_table->empty (); + sized_type_hash_table = NULL; } /* GNAT_ENTITY is a GNAT tree node for an entity. Associate GNU_DECL, a GCC @@ -1350,6 +1377,79 @@ type_unsigned_for_rm (tree type) return false; } +/* Return true iff the sized types are equivalent. */ + +bool +sized_type_hasher::equal (siz
[gcc r14-10374] preprocessor: Create the parser before handling command-line includes [PR115312]
https://gcc.gnu.org/g:3389a23fd492b7920a62de6af298251b3cdab617 commit r14-10374-g3389a23fd492b7920a62de6af298251b3cdab617 Author: Lewis Hyatt Date: Sat Jun 15 21:09:01 2024 -0400 preprocessor: Create the parser before handling command-line includes [PR115312] Since r14-2893, we create a parser object in preprocess-only mode for the purpose of parsing #pragma while preprocessing. The parser object was formerly created after calling c_finish_options(), which leads to problems on platforms that don't use stdc-predef.h (such as MinGW, as reported in the PR). On such platforms, the call to c_finish_options() will process the first command-line-specified include file. If that includes a PCH, then c-ppoutput.cc will encounter a state it did not anticipate. Fix it by creating the parser prior to calling c_finish_options(). gcc/c-family/ChangeLog: PR pch/115312 * c-opts.cc (c_common_init): Call c_init_preprocess() before c_finish_options() so that a parser is available to process any includes specified on the command line. gcc/testsuite/ChangeLog: PR pch/115312 * g++.dg/pch/pr115312.C: New test. * g++.dg/pch/pr115312.Hs: New test. Diff: --- gcc/c-family/c-opts.cc | 2 +- gcc/testsuite/g++.dg/pch/pr115312.C | 2 ++ gcc/testsuite/g++.dg/pch/pr115312.Hs | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc index be3058dca63..f4dced8d826 100644 --- a/gcc/c-family/c-opts.cc +++ b/gcc/c-family/c-opts.cc @@ -1285,8 +1285,8 @@ c_common_init (void) if (flag_preprocess_only) { - c_finish_options (); c_init_preprocess (); + c_finish_options (); preprocess_file (parse_in); return false; } diff --git a/gcc/testsuite/g++.dg/pch/pr115312.C b/gcc/testsuite/g++.dg/pch/pr115312.C new file mode 100644 index 000..9074ad4a5ad --- /dev/null +++ b/gcc/testsuite/g++.dg/pch/pr115312.C @@ -0,0 +1,2 @@ +/* { dg-additional-options "-include pr115312.H -save-temps" } */ +#error "suppress PCH assembly comparison, which does not work with -save-temps" /* { dg-error "." } */ diff --git a/gcc/testsuite/g++.dg/pch/pr115312.Hs b/gcc/testsuite/g++.dg/pch/pr115312.Hs new file mode 100644 index 000..6e7c6bcac2f --- /dev/null +++ b/gcc/testsuite/g++.dg/pch/pr115312.Hs @@ -0,0 +1 @@ +// This space intentionally left blank.
[gcc r15-1817] [PATCH] match.pd: Fold x/sqrt(x) to sqrt(x)
https://gcc.gnu.org/g:8dc5ad3ce8d4d2cd6cc2b7516d282395502fdf7d commit r15-1817-g8dc5ad3ce8d4d2cd6cc2b7516d282395502fdf7d Author: Jennifer Schmitz Date: Wed Jul 3 14:40:42 2024 +0200 [PATCH] match.pd: Fold x/sqrt(x) to sqrt(x) This patch adds a pattern in match.pd folding x/sqrt(x) to sqrt(x) for -funsafe-math-optimizations. Test cases were added for double, float, and long double. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. Ok for mainline? Signed-off-by: Jennifer Schmitz gcc/ * match.pd: Fold x/sqrt(x) to sqrt(x). gcc/testsuite/ * gcc.dg/tree-ssa/sqrt_div.c: New test. Diff: --- gcc/match.pd | 4 gcc/testsuite/gcc.dg/tree-ssa/sqrt_div.c | 23 +++ 2 files changed, 27 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 7fff7b5f9fe..a2e205b3207 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -7770,6 +7770,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) when the operand has that value.) */ (if (flag_unsafe_math_optimizations) + /* Simplify x / sqrt(x) -> sqrt(x). */ + (simplify + (rdiv @0 (SQRT @0)) (SQRT @0)) + /* Simplify sqrt(x) * sqrt(x) -> x. */ (simplify (mult (SQRT_ALL@1 @0) @1) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sqrt_div.c b/gcc/testsuite/gcc.dg/tree-ssa/sqrt_div.c new file mode 100644 index 000..2ae481b7982 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/sqrt_div.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -fdump-tree-forwprop-details" } */ +/* { dg-require-effective-target c99_runtime } */ + +#define T(n, type, fname) \ +type f##n (type x) \ +{ \ + type t1 = __builtin_##fname (x); \ + type t2 = x / t1;\ + return t2; \ +} + +T(1, double, sqrt) + +/* { dg-final { scan-tree-dump "gimple_simplified to t2_\[0-9\]+ = __builtin_sqrt .x_\[0-9\]*.D.." "forwprop1" } } */ + +T(2, float, sqrtf ) + +/* { dg-final { scan-tree-dump "gimple_simplified to t2_\[0-9\]+ = __builtin_sqrtf .x_\[0-9\]*.D.." "forwprop1" } } */ + +T(3, long double, sqrtl) + +/* { dg-final { scan-tree-dump "gimple_simplified to t2_\[0-9\]+ = __builtin_sqrtl .x_\[0-9\]*.D.." "forwprop1" } } */
[gcc r15-1818] Remove redundant vector permute dump
https://gcc.gnu.org/g:1dc2096537818bd80191e0d6015412e2906658bc commit r15-1818-g1dc2096537818bd80191e0d6015412e2906658bc Author: Richard Biener Date: Wed Jul 3 13:49:58 2024 +0200 Remove redundant vector permute dump The following removes redundant dumping in vect permute vectorization. * tree-vect-slp.cc (vectorizable_slp_permutation_1): Remove redundant dump. Diff: --- gcc/tree-vect-slp.cc | 10 -- 1 file changed, 10 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 22ed59a817d..a8bb08ea7be 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -9350,16 +9350,6 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi, } gcc_assert (perm.length () == SLP_TREE_LANES (node)); - if (dump_p) -{ - dump_printf_loc (MSG_NOTE, vect_location, - "vectorizing permutation"); - for (unsigned i = 0; i < perm.length (); ++i) - dump_printf (MSG_NOTE, " op%u[%u]", perm[i].first, perm[i].second); - if (repeating_p) - dump_printf (MSG_NOTE, " (repeat %d)\n", SLP_TREE_LANES (node)); - dump_printf (MSG_NOTE, "\n"); -} /* REPEATING_P is true if every output vector is guaranteed to use the same permute vector. We can handle that case for both variable-length
[gcc r15-1819] Vect: Support IFN SAT_TRUNC for unsigned vector int
https://gcc.gnu.org/g:8d2c460e79aa013cc4eeb79bb45d18bd3d0aee58 commit r15-1819-g8d2c460e79aa013cc4eeb79bb45d18bd3d0aee58 Author: Pan Li Date: Tue Jul 2 21:23:43 2024 +0800 Vect: Support IFN SAT_TRUNC for unsigned vector int This patch would like to support the .SAT_TRUNC for the unsigned vector int. Given we have below example code: Form 1 #define VEC_DEF_SAT_U_TRUC_FMT_1(NT, WT) \ void __attribute__((noinline)) \ vec_sat_u_truc_##WT##_to_##NT##_fmt_1 (NT *x, WT *y, unsigned limit) \ {\ for (unsigned i = 0; i < limit; i++) \ {\ bool overflow = y[i] > (WT)(NT)(-1); \ x[i] = ((NT)y[i]) | (NT)-overflow; \ }\ } VEC_DEF_SAT_U_TRUC_FMT_1 (uint32_t, uint64_t) Before this patch: void vec_sat_u_truc_uint64_t_to_uint32_t_fmt_1 (uint32_t * x, uint64_t * y, unsigned int limit) { ... _51 = .SELECT_VL (ivtmp_49, POLY_INT_CST [2, 2]); ivtmp_35 = _51 * 8; vect__4.7_32 = .MASK_LEN_LOAD (vectp_y.5_34, 64B, { -1, ... }, _51, 0); mask_overflow_16.8_30 = vect__4.7_32 > { 4294967295, ... }; vect__5.9_29 = (vector([2,2]) unsigned int) vect__4.7_32; vect__10.13_20 = .VCOND_MASK (mask_overflow_16.8_30, { 4294967295, ... }, vect__5.9_29); ivtmp_12 = _51 * 4; .MASK_LEN_STORE (vectp_x.14_11, 32B, { -1, ... }, _51, 0, vect__10.13_20); vectp_y.5_33 = vectp_y.5_34 + ivtmp_35; vectp_x.14_46 = vectp_x.14_11 + ivtmp_12; ivtmp_50 = ivtmp_49 - _51; if (ivtmp_50 != 0) ... } After this patch: void vec_sat_u_truc_uint64_t_to_uint32_t_fmt_1 (uint32_t * x, uint64_t * y, unsigned int limit) { ... _12 = .SELECT_VL (ivtmp_21, POLY_INT_CST [2, 2]); ivtmp_34 = _12 * 8; vect__4.7_31 = .MASK_LEN_LOAD (vectp_y.5_33, 64B, { -1, ... }, _12, 0); vect_patt_40.8_30 = .SAT_TRUNC (vect__4.7_31); // << .SAT_TRUNC ivtmp_29 = _12 * 4; .MASK_LEN_STORE (vectp_x.9_28, 32B, { -1, ... }, _12, 0, vect_patt_40.8_30); vectp_y.5_32 = vectp_y.5_33 + ivtmp_34; vectp_x.9_27 = vectp_x.9_28 + ivtmp_29; ivtmp_20 = ivtmp_21 - _12; if (ivtmp_20 != 0) ... } The below test suites are passed for this patch * The x86 bootstrap test. * The x86 fully regression test. * The rv64gcv fully regression tests. gcc/ChangeLog: * tree-vect-patterns.cc (gimple_unsigned_integer_sat_trunc): Add new decl generated by match. (vect_recog_sat_trunc_pattern): Add new func impl to recog the .SAT_TRUNC pattern. Signed-off-by: Pan Li Diff: --- gcc/tree-vect-patterns.cc | 54 +++ 1 file changed, 54 insertions(+) diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 519d15f2a43..86e893a1c43 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -4489,6 +4489,7 @@ vect_recog_mult_pattern (vec_info *vinfo, extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); extern bool gimple_unsigned_integer_sat_sub (tree, tree*, tree (*)(tree)); +extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree)); static gimple * vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info, @@ -4603,6 +4604,58 @@ vect_recog_sat_sub_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, return NULL; } +/* + * Try to detect saturation truncation pattern (SAT_TRUNC), aka below gimple: + * overflow_5 = x_4(D) > 4294967295; + * _1 = (unsigned int) x_4(D); + * _2 = (unsigned int) overflow_5; + * _3 = -_2; + * _6 = _1 | _3; + * + * And then simplied to + * _6 = .SAT_TRUNC (x_4(D)); + */ + +static gimple * +vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, + tree *type_out) +{ + gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo); + + if (!is_gimple_assign (last_stmt)) +return NULL; + + tree ops[1]; + tree lhs = gimple_assign_lhs (last_stmt); + + if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)) +{ + tree itype = TREE_TYPE (ops[0]); + tree otype = TREE_TYPE (lhs); + tree v_itype = get_vectype_for_scalar_type (vinfo, itype); + tree v_otype = get_vectype_for_scalar_type (vinfo, otype); + internal_fn fn = IFN_SAT_TRUNC; + + if (v_itype != NULL_TREE && v_otype != NULL_TREE + && direct_internal_fn_supported_p (fn, tree_pair (v_otype, v_itype), + OPTIMIZE_FOR_BOTH)) + { + gcall *c
[gcc r15-1820] Match: Allow more types truncation for .SAT_TRUNC
https://gcc.gnu.org/g:44c767c06b6882d05fe56f4a3e03195101402fb0 commit r15-1820-g44c767c06b6882d05fe56f4a3e03195101402fb0 Author: Pan Li Date: Tue Jul 2 08:57:50 2024 +0800 Match: Allow more types truncation for .SAT_TRUNC The .SAT_TRUNC has the input and output types, aka cvt from itype to otype and the sizeof (otype) < sizeof (itype). The previous patch only allows the sizeof (otype) == sizeof (itype) / 2. But actually we have 1/4 and 1/8 truncation. This patch would like to support more types trunction when sizeof (otype) < sizeof (itype). The below truncation will be covered. * uint64_t => uint8_t * uint64_t => uint16_t * uint64_t => uint32_t * uint32_t => uint8_t * uint32_t => uint16_t * uint16_t => uint8_t The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The rv64gcv build with glibc. 3. The x86 bootstrap tests. 4. The x86 fully regression tests. gcc/ChangeLog: * match.pd: Allow any otype is less than itype truncation. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index a2e205b3207..4edfa2ae2c9 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3239,16 +3239,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (match (unsigned_integer_sat_trunc @0) (bit_ior:c (negate (convert (gt @0 INTEGER_CST@1))) (convert @0)) - (with { + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && TYPE_UNSIGNED (TREE_TYPE (@0))) + (with + { unsigned itype_precision = TYPE_PRECISION (TREE_TYPE (@0)); unsigned otype_precision = TYPE_PRECISION (type); - wide_int trunc_max = wi::mask (itype_precision / 2, false, itype_precision); + wide_int trunc_max = wi::mask (otype_precision, false, itype_precision); wide_int int_cst = wi::to_wide (@1, itype_precision); } - (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) - && TYPE_UNSIGNED (TREE_TYPE (@0)) - && otype_precision < itype_precision - && wi::eq_p (trunc_max, int_cst) + (if (otype_precision < itype_precision && wi::eq_p (trunc_max, int_cst)) /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */
[gcc r15-1821] [MAINTAINERS] Update my email address.
https://gcc.gnu.org/g:56814070003d2d54885c9009b7594c2b758ff81b commit r15-1821-g56814070003d2d54885c9009b7594c2b758ff81b Author: Prathamesh Kulkarni Date: Wed Jul 3 18:59:48 2024 +0530 [MAINTAINERS] Update my email address. * MAINTAINERS: Update my email address and add myself to DCO. Signed-off-by: Prathamesh Kulkarni Diff: --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index ddeea7b497f..b4739f29107 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -511,7 +511,7 @@ Matt Kraai Jan Kratochvil Matthias Kretz Louis Krupp -Prathamesh Kulkarni +Prathamesh Kulkarni Venkataramanan Kumar Doug Kwan Aaron W. LaFramboise @@ -791,3 +791,4 @@ Jonathan Wakely Alexander Westbrooks Chung-Ju Wu Pengxuan Zheng +Prathamesh Kulkarni
[gcc r15-1822] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763]
https://gcc.gnu.org/g:de9254e224eb3d89303cb9b3ba50b4c479c55f7c commit r15-1822-gde9254e224eb3d89303cb9b3ba50b4c479c55f7c Author: Pan Li Date: Wed Jul 3 22:06:48 2024 +0800 RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW [PR115763] According to the ISA, the zvfhmin sub extension should only contain convertion insn. Thus, the vfmv insn acts on FP16 should not be present when only the zvfhmin option is given. This patch would like to fix it by split the pred_broadcast define_insn into zvfhmin and zvfh part. Given below example: void test (_Float16 *dest, _Float16 bias) { dest[0] = bias; dest[1] = bias; } when compile with -march=rv64gcv_zfh_zvfhmin Before this patch: test: vsetivlizero,2,e16,mf4,ta,ma vfmv.v.fv1,fa0 // should not leverage vfmv for zvfhmin vse16.v v1,0(a0) ret After this patch: test: addi sp,sp,-16 fsh fa0,14(sp) addi a5,sp,14 vsetivli zero,2,e16,mf4,ta,ma vlse16.v v1,0(a5),zero vse16.v v1,0(a0) addi sp,sp,16 jr ra PR target/115763 gcc/ChangeLog: * config/riscv/vector.md (*pred_broadcast): Split into zvfh and zvfhmin part. (*pred_broadcast_zvfh): New define_insn for zvfh part. (*pred_broadcast_zvfhmin): Ditto but for zvfhmin. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/scalar_move-5.c: Adjust asm check. * gcc.target/riscv/rvv/base/scalar_move-6.c: Ditto. * gcc.target/riscv/rvv/base/scalar_move-7.c: Ditto. * gcc.target/riscv/rvv/base/scalar_move-8.c: Ditto. * gcc.target/riscv/rvv/base/pr115763-1.c: New test. * gcc.target/riscv/rvv/base/pr115763-2.c: New test. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/vector.md | 49 +++--- .../gcc.target/riscv/rvv/base/pr115763-1.c | 9 .../gcc.target/riscv/rvv/base/pr115763-2.c | 10 + .../gcc.target/riscv/rvv/base/scalar_move-5.c | 4 +- .../gcc.target/riscv/rvv/base/scalar_move-6.c | 6 +-- .../gcc.target/riscv/rvv/base/scalar_move-7.c | 6 +-- .../gcc.target/riscv/rvv/base/scalar_move-8.c | 6 +-- 7 files changed, 64 insertions(+), 26 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index fe18ee5b5f7..d9474262d54 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -2080,31 +2080,50 @@ [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv") (set_attr "mode" "")]) -(define_insn "*pred_broadcast" - [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, vr, vr, vr, vr, vr, vr") - (if_then_else:V_VLSF_ZVFHMIN +(define_insn "*pred_broadcast_zvfh" + [(set (match_operand:V_VLSF0 "register_operand" "=vr, vr, vr, vr") + (if_then_else:V_VLSF (unspec: - [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1,Wc1, vm, vm,Wc1,Wc1,Wb1,Wb1") -(match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK") -(match_operand 5 "const_int_operand" " i, i, i, i, i, i, i, i") -(match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i") -(match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i") + [(match_operand: 1 "vector_broadcast_mask_operand" "Wc1, Wc1, Wb1, Wb1") +(match_operand 4 "vector_length_operand" " rK, rK, rK, rK") +(match_operand 5 "const_int_operand" " i, i, i, i") +(match_operand 6 "const_int_operand" " i, i, i, i") +(match_operand 7 "const_int_operand" " i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (vec_duplicate:V_VLSF_ZVFHMIN - (match_operand: 3 "direct_broadcast_operand" " f, f,Wdm,Wdm,Wdm,Wdm, f, f")) - (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand""vu, 0, vu, 0, vu, 0, vu, 0")))] + (vec_duplicate:V_VLSF + (match_operand: 3 "direct_broadcast_operand" " f, f, f, f")) + (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "@ vfmv.v.f\t%0,%3 vfmv.v.f\t%0,%3 + vfmv.s.f\t%0,%3 + vfmv.s.f\t%0,%3" + [(set_attr "type" "vfmov,vfmov,vfmovfv,vfmovfv") + (set_attr "mode" "")]) + +(define_insn "*pred_broadcast_zvfhmin" + [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, vr, vr") + (if_then_else:V_VLSF_ZVFHMIN + (unspec: + [(match_operand:1
[gcc/ibm/heads/gcc-12-branch] (21 commits) ibm: Merge up to top of releases/gcc-12
The branch 'ibm/heads/gcc-12-branch' was updated to point to: 95d8973a6f8... ibm: Merge up to top of releases/gcc-12 It previously pointed to: 3409c8aae4b... ibm: Merge up to top of releases/gcc-12 Diff: Summary of changes (added commits): --- 95d8973... ibm: Merge up to top of releases/gcc-12 5f699cb... AVR: target/98762 - Handle partial clobber in movqi output. (*) ca6eea0... rs6000: Fix wrong RTL patterns for vector merge high/low sh (*) 13f0528... rs6000: Fix wrong RTL patterns for vector merge high/low ch (*) c99573f... Daily bump. (*) 4351caf... Daily bump. (*) 5330558... AVR: target/88236, target/115726 - Fix __memx code generati (*) 1d6c409... Daily bump. (*) 1a837bc... Daily bump. (*) ae5bf1a... Daily bump. (*) 96ef336... rs6000: Fix wrong RTL patterns for vector merge high/low wo (*) aba7335... Daily bump. (*) 95ca5f4... [libstdc++] [testsuite] defer to check_vect_support* [PR115 (*) 25cb136... Add support for -mcpu=grace (*) 4f8dc81... Daily bump. (*) 809d911... Daily bump. (*) 4b69afd... libstdc++: Remove confusing text from status tables for rel (*) 814b8cc... Daily bump. (*) 0fd6ae9... rs6000: Don't clobber return value when eh_return called [P (*) f4affb9... Daily bump. (*) 723716c... Daily bump. (*) (*) This commit already exists in another branch. Because the reference `refs/vendors/ibm/heads/gcc-12-branch' matches your hooks.email-new-commits-only configuration, no separate email is sent for this commit.
[gcc(refs/vendors/ibm/heads/gcc-12-branch)] ibm: Merge up to top of releases/gcc-12
https://gcc.gnu.org/g:95d8973a6f8fb29dee98b12f2d90f836620410b3 commit 95d8973a6f8fb29dee98b12f2d90f836620410b3 Merge: 3409c8aae4b 5f699cb08ee Author: Peter Bergner Date: Wed Jul 3 09:07:00 2024 -0500 ibm: Merge up to top of releases/gcc-12 2024-07-03 Peter Bergner Merge up to releases/gcc-12 5f699cb08eed44a903393f601009e9c6d0b59c59 Diff: gcc/ChangeLog | 77 +++ gcc/ChangeLog.ibm | 4 + gcc/DATESTAMP | 2 +- gcc/config/aarch64/aarch64-cores.def | 1 + gcc/config/aarch64/aarch64-tune.md | 2 +- gcc/config/avr/avr.cc | 35 +++- gcc/config/avr/avr.md | 18 +- gcc/config/rs6000/altivec.md | 222 +++-- gcc/config/rs6000/rs6000-logue.cc | 7 +- gcc/config/rs6000/rs6000.cc| 24 +-- gcc/config/rs6000/rs6000.md| 15 ++ gcc/config/rs6000/vsx.md | 28 +-- gcc/doc/invoke.texi| 4 +- gcc/testsuite/ChangeLog| 28 +++ gcc/testsuite/g++.target/powerpc/pr106069.C| 119 +++ .../gcc.target/avr/torture/pr88236-pr115726.c | 115 +++ gcc/testsuite/gcc.target/avr/torture/pr98762.c | 19 ++ gcc/testsuite/gcc.target/powerpc/pr106069-1.c | 39 gcc/testsuite/gcc.target/powerpc/pr106069-2.c | 37 gcc/testsuite/gcc.target/powerpc/pr114846.c| 20 ++ gcc/testsuite/gcc.target/powerpc/pr115355.c| 37 libstdc++-v3/ChangeLog | 23 +++ libstdc++-v3/doc/html/manual/status.html | 24 +-- libstdc++-v3/doc/xml/manual/status_cxx1998.xml | 3 +- libstdc++-v3/doc/xml/manual/status_cxx2011.xml | 3 +- libstdc++-v3/doc/xml/manual/status_cxx2014.xml | 3 +- libstdc++-v3/doc/xml/manual/status_cxx2017.xml | 3 +- libstdc++-v3/doc/xml/manual/status_cxx2020.xml | 3 +- libstdc++-v3/doc/xml/manual/status_cxx2023.xml | 3 +- libstdc++-v3/doc/xml/manual/status_cxxtr1.xml | 3 +- libstdc++-v3/doc/xml/manual/status_cxxtr24733.xml | 3 +- .../experimental/simd/pr115454_find_last_set.cc| 1 - 32 files changed, 791 insertions(+), 134 deletions(-) diff --cc gcc/ChangeLog.ibm index a07d18e2326,000..c7932696714 mode 100644,00..100644 --- a/gcc/ChangeLog.ibm +++ b/gcc/ChangeLog.ibm @@@ -1,51 -1,0 +1,55 @@@ ++2024-07-03 Peter Bergner ++ ++ Merge up to releases/gcc-12 5f699cb08eed44a903393f601009e9c6d0b59c59 ++ +2024-06-22 Peter Bergner + + Merge up to releases/gcc-12 218adac0fce6135fcb5c0c56911272687f05872b + +2024-05-29 Peter Bergner + + Merge up to releases/gcc-12 342f577d8ea60c3473a6c1e66ef038b96f99f9d2 + +2024-01-26 Peter Bergner + + Merge up to releases/gcc-12 52dde3e8af9fac2e9c11ce5dfdad1f19546bbb64 + +2023-11-21 Peter Bergner + + Merge up to releases/gcc-12 a285310c50a868d27f63fc0297e56dd5d31a924b + +2023-09-14 Peter Bergner + + Merge up to releases/gcc-12 6ca605af5995abf3d4013e7e146754509b8faddb + +2023-07-31 Peter Bergner + + Merge up to releases/gcc-12 79ebcd30bda2cd00bf442a28717ec50ae0a8cd1d + +2023-04-11 Peter Bergner + + Merge up to releases/gcc-12 908d9c7e6ed4be95d39b7b01056dda365f379947 + +2023-03-27 Peter Bergner + + Merge up to releases/gcc-12 4f41c4ff250709219a7c3eba27a62f8a4689412b + +2022-11-04 Peter Bergner + + Merge up to releases/gcc-12 33561e870dc48966e8c7ede46e95032279a15423 + +2022-10-17 Peter Bergner + + Merge up to releases/gcc-12 fe7d74313736b8e1c30812bc49419f419bdf1c53 + +2022-09-16 Peter Bergner + + Merge up to releases/gcc-12 05cfd7b0677502d06a50ea6ff05d4445e194e3b9 + +2022-08-19 Peter Bergner + + Merge up to releases/gcc-12 2d29d7b240d9ca87cbee5d90c846694125d293af + +2022-06-15 Peter Bergner + + Create ibm/gcc-12-branch which follows the releases/gcc-12 branch.
[gcc r15-1823] [PATCH] ARC: Update gcc.target/arc/pr9001184797.c test
https://gcc.gnu.org/g:c41eb4c702ed04993a475d5910c190af1ff66720 commit r15-1823-gc41eb4c702ed04993a475d5910c190af1ff66720 Author: Luis Silva Date: Wed Jul 3 09:41:05 2024 -0600 [PATCH] ARC: Update gcc.target/arc/pr9001184797.c test ... to comply with new standards due to stricter analysis in the latest GCC versions. gcc/testsuite/ChangeLog: * gcc.target/arc/pr9001184797.c: Fix compiler warnings. Diff: --- gcc/testsuite/gcc.target/arc/pr9001184797.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/arc/pr9001184797.c b/gcc/testsuite/gcc.target/arc/pr9001184797.c index e76c6769042..6c5de5fe729 100644 --- a/gcc/testsuite/gcc.target/arc/pr9001184797.c +++ b/gcc/testsuite/gcc.target/arc/pr9001184797.c @@ -4,13 +4,15 @@ /* This test studies the use of anchors and tls symbols. */ +extern int h(); + struct a b; struct a { long c; long d } e() { static __thread struct a f; - static __thread g; + static __thread int g; g = 5; h(); if (f.c)
[gcc r15-1824] RISC-V: Add support for Zabha extension
https://gcc.gnu.org/g:7b2b2e3d660edc8ef3a8cfbdfc2b0fd499459601 commit r15-1824-g7b2b2e3d660edc8ef3a8cfbdfc2b0fd499459601 Author: Gianluca Guida Date: Tue Jul 2 18:05:14 2024 -0700 RISC-V: Add support for Zabha extension The Zabha extension adds support for subword Zaamo ops. Extension: https://github.com/riscv/riscv-zabha.git Ratification: https://jira.riscv.org/browse/RVS-1685 gcc/ChangeLog: * common/config/riscv/riscv-common.cc (riscv_subset_list::to_string): Skip zabha when not supported by the assembler. * config.in: Regenerate. * config/riscv/arch-canonicalize: Make zabha imply zaamo. * config/riscv/iterators.md (amobh): Add iterator for amo byte/halfword. * config/riscv/riscv.opt: Add zabha. * config/riscv/sync.md (atomic_): Add subword atomic op pattern. (zabha_atomic_fetch_): Add subword atomic_fetch op pattern. (lrsc_atomic_fetch_): Prefer zabha over lrsc for subword atomic ops. (zabha_atomic_exchange): Add subword atomic exchange pattern. (lrsc_atomic_exchange): Prefer zabha over lrsc for subword atomic exchange ops. * configure: Regenerate. * configure.ac: Add zabha assembler check. * doc/sourcebuild.texi: Add zabha documentation. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Add zabha testsuite infra support. * gcc.target/riscv/amo/inline-atomics-1.c: Remove zabha to continue to test the lr/sc subword patterns. * gcc.target/riscv/amo/inline-atomics-2.c: Ditto. * gcc.target/riscv/amo/zalrsc-rvwmo-subword-amo-add-char-acq-rel.c: Ditto. * gcc.target/riscv/amo/zalrsc-rvwmo-subword-amo-add-char-acquire.c: Ditto. * gcc.target/riscv/amo/zalrsc-rvwmo-subword-amo-add-char-relaxed.c: Ditto. * gcc.target/riscv/amo/zalrsc-rvwmo-subword-amo-add-char-release.c: Ditto. * gcc.target/riscv/amo/zalrsc-rvwmo-subword-amo-add-char-seq-cst.c: Ditto. * gcc.target/riscv/amo/zalrsc-ztso-subword-amo-add-char-acq-rel.c: Ditto. * gcc.target/riscv/amo/zalrsc-ztso-subword-amo-add-char-acquire.c: Ditto. * gcc.target/riscv/amo/zalrsc-ztso-subword-amo-add-char-relaxed.c: Ditto. * gcc.target/riscv/amo/zalrsc-ztso-subword-amo-add-char-release.c: Ditto. * gcc.target/riscv/amo/zalrsc-ztso-subword-amo-add-char-seq-cst.c: Ditto. * gcc.target/riscv/amo/zabha-all-amo-ops-char-run.c: New test. * gcc.target/riscv/amo/zabha-all-amo-ops-short-run.c: New test. * gcc.target/riscv/amo/zabha-rvwmo-all-amo-ops-char.c: New test. * gcc.target/riscv/amo/zabha-rvwmo-all-amo-ops-short.c: New test. * gcc.target/riscv/amo/zabha-rvwmo-amo-add-char.c: New test. * gcc.target/riscv/amo/zabha-rvwmo-amo-add-short.c: New test. * gcc.target/riscv/amo/zabha-ztso-amo-add-char.c: New test. * gcc.target/riscv/amo/zabha-ztso-amo-add-short.c: New test. Co-Authored-By: Patrick O'Neill Signed-Off-By: Gianluca Guida Tested-by: Andrea Parri Diff: --- gcc/common/config/riscv/riscv-common.cc| 12 gcc/config.in | 6 ++ gcc/config/riscv/arch-canonicalize | 3 + gcc/config/riscv/iterators.md | 3 + gcc/config/riscv/riscv.opt | 2 + gcc/config/riscv/sync.md | 81 +- gcc/configure | 31 + gcc/configure.ac | 5 ++ gcc/doc/sourcebuild.texi | 12 +++- .../gcc.target/riscv/amo/inline-atomics-1.c| 1 + .../gcc.target/riscv/amo/inline-atomics-2.c| 1 + .../riscv/amo/zabha-all-amo-ops-char-run.c | 5 ++ .../riscv/amo/zabha-all-amo-ops-short-run.c| 5 ++ .../riscv/amo/zabha-rvwmo-all-amo-ops-char.c | 23 ++ .../riscv/amo/zabha-rvwmo-all-amo-ops-short.c | 23 ++ .../riscv/amo/zabha-rvwmo-amo-add-char.c | 57 +++ .../riscv/amo/zabha-rvwmo-amo-add-short.c | 57 +++ .../gcc.target/riscv/amo/zabha-ztso-amo-add-char.c | 57 +++ .../riscv/amo/zabha-ztso-amo-add-short.c | 57 +++ .../zalrsc-rvwmo-subword-amo-add-char-acq-rel.c| 1 + .../zalrsc-rvwmo-subword-amo-add-char-acquire.c| 1 + .../zalrsc-rvwmo-subword-amo-add-char-relaxed.c| 1 + .../zalrsc-rvwmo-subword-amo-add-char-release.c| 1 + .../zalrsc-rvwmo-subword-amo-add-char-seq-cst.c| 1 + .../amo/zalrsc-ztso-subword-amo-add-char-acq-rel.c | 1 + .../amo/zalrsc-ztso-subword-amo-add-char-acquire.c | 1 + .../amo/
[gcc r15-1825] RISC-V: Describe -march behavior for dependent extensions
https://gcc.gnu.org/g:70f6bc39c4b0e147a816ad1dad583f944616c367 commit r15-1825-g70f6bc39c4b0e147a816ad1dad583f944616c367 Author: Palmer Dabbelt Date: Tue Jul 2 18:20:39 2024 -0700 RISC-V: Describe -march behavior for dependent extensions gcc/ChangeLog: * doc/invoke.texi: Describe -march behavior for dependent extensions on RISC-V. Diff: --- gcc/doc/invoke.texi | 4 1 file changed, 4 insertions(+) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 68ebd79d676..b37c7af7a39 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -31063,6 +31063,10 @@ If both @option{-march} and @option{-mcpu=} are not specified, the default for this argument is system dependent, users who want a specific architecture extensions should specify one explicitly. +When the RISC-V specifications define an extension as depending on other +extensions, GCC will implicitly add the dependent extensions to the enabled +extension set if they weren't added explicitly. + @opindex mcpu @item -mcpu=@var{processor-string} Use architecture of and optimize the output for the given processor, specified
[gcc r15-1826] Fortran: fix associate with assumed-length character array [PR115700]
https://gcc.gnu.org/g:7b7f203472d07a05d959a29638c7c95d98bf0c1c commit r15-1826-g7b7f203472d07a05d959a29638c7c95d98bf0c1c Author: Harald Anlauf Date: Tue Jul 2 21:26:05 2024 +0200 Fortran: fix associate with assumed-length character array [PR115700] gcc/fortran/ChangeLog: PR fortran/115700 * trans-stmt.cc (trans_associate_var): When the associate target is an array-valued character variable, the length is known at entry of the associate block. Move setting of string length of the selector to the initialization part of the block. gcc/testsuite/ChangeLog: PR fortran/115700 * gfortran.dg/associate_69.f90: New test. Diff: --- gcc/fortran/trans-stmt.cc | 18 gcc/testsuite/gfortran.dg/associate_69.f90 | 33 ++ 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc index 60275e18867..703a705e7ca 100644 --- a/gcc/fortran/trans-stmt.cc +++ b/gcc/fortran/trans-stmt.cc @@ -1911,6 +1911,8 @@ trans_associate_var (gfc_symbol *sym, gfc_wrapped_block *block) gfc_se se; tree desc; bool cst_array_ctor; + stmtblock_t init; + gfc_init_block (&init); desc = sym->backend_decl; cst_array_ctor = e->expr_type == EXPR_ARRAY @@ -1935,10 +1937,17 @@ trans_associate_var (gfc_symbol *sym, gfc_wrapped_block *block) && !sym->attr.select_type_temporary && sym->ts.u.cl->backend_decl && VAR_P (sym->ts.u.cl->backend_decl) + && se.string_length && se.string_length != sym->ts.u.cl->backend_decl) - gfc_add_modify (&se.pre, sym->ts.u.cl->backend_decl, - fold_convert (TREE_TYPE (sym->ts.u.cl->backend_decl), - se.string_length)); + { + /* When the target is a variable, its length is already known. */ + tree len = fold_convert (TREE_TYPE (sym->ts.u.cl->backend_decl), + se.string_length); + if (e->expr_type == EXPR_VARIABLE) + gfc_add_modify (&init, sym->ts.u.cl->backend_decl, len); + else + gfc_add_modify (&se.pre, sym->ts.u.cl->backend_decl, len); + } /* If we didn't already do the pointer assignment, set associate-name descriptor to the one generated for the temporary. */ @@ -1978,7 +1987,8 @@ trans_associate_var (gfc_symbol *sym, gfc_wrapped_block *block) } /* Done, register stuff as init / cleanup code. */ - gfc_add_init_cleanup (block, gfc_finish_block (&se.pre), + gfc_add_block_to_block (&init, &se.pre); + gfc_add_init_cleanup (block, gfc_finish_block (&init), gfc_finish_block (&se.post)); } diff --git a/gcc/testsuite/gfortran.dg/associate_69.f90 b/gcc/testsuite/gfortran.dg/associate_69.f90 new file mode 100644 index 000..28f488bb274 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/associate_69.f90 @@ -0,0 +1,33 @@ +! { dg-do compile } +! { dg-options "-Og -Wuninitialized -Wmaybe-uninitialized -fdump-tree-optimized" } +! +! PR fortran/115700 - Bogus warning for associate with assumed-length character array +! +subroutine mvce(x) + implicit none + character(len=*), dimension(:), intent(in) :: x + + associate (tmp1 => x) +if (len (tmp1) /= len (x)) stop 1 + end associate + + associate (tmp2 => x(1:)) +if (len (tmp2) /= len (x)) stop 2 + end associate + + associate (tmp3 => x(1:)(:)) +if (len (tmp3) /= len (x)) stop 3 + end associate + +! The following associate blocks still produce bogus warnings: + +! associate (tmp4 => x(:)(1:)) +! if (len (tmp4) /= len (x)) stop 4 +! end associate +! +! associate (tmp5 => x(1:)(1:)) +! if (len (tmp5) /= len (x)) stop 5 +! end associate +end + +! { dg-final { scan-tree-dump-not " \\.tmp" "optimized" } }
[gcc(refs/users/meissner/heads/work171)] Revert changes
https://gcc.gnu.org/g:6b86afaa700af66757f1d40b64e5f9e9e54e8d5c commit 6b86afaa700af66757f1d40b64e5f9e9e54e8d5c Author: Michael Meissner Date: Wed Jul 3 13:27:36 2024 -0400 Revert changes Diff: --- gcc/testsuite/gcc.target/powerpc/power11-1.c | 12 gcc/testsuite/gcc.target/powerpc/power11-2.c | 22 -- gcc/testsuite/gcc.target/powerpc/power11-3.c | 11 --- 3 files changed, 45 deletions(-) diff --git a/gcc/testsuite/gcc.target/powerpc/power11-1.c b/gcc/testsuite/gcc.target/powerpc/power11-1.c deleted file mode 100644 index a5aa32490df..000 --- a/gcc/testsuite/gcc.target/powerpc/power11-1.c +++ /dev/null @@ -1,12 +0,0 @@ -/* { dg-do assemble { target powerpc*-*-* } } */ -/* { dg-options "-mdejagnu-cpu=power11 -O2" } */ - -/* Basic check to see if the compiler supports -mcpu=power11. */ - -#ifndef _ARCH_PWR11 -#error "-mcpu=power11 is not supported" -#endif - -void foo (void) -{ -} diff --git a/gcc/testsuite/gcc.target/powerpc/power11-2.c b/gcc/testsuite/gcc.target/powerpc/power11-2.c deleted file mode 100644 index 4521c2a37c7..000 --- a/gcc/testsuite/gcc.target/powerpc/power11-2.c +++ /dev/null @@ -1,22 +0,0 @@ -/* Require VSX and Linux to eliminate systems where you can't do - __attribute__((__target__(...))). */ -/* { dg-do compile { target { powerpc*-*-linux* } } } */ -/* { dg-require-effective-target powerpc_vsx_ok } */ -/* { dg-options "-O2" } */ - -/* Check if we can set the power11 target via a target attribute. */ - -__attribute__((__target__("cpu=power9"))) -void foo_p9 (void) -{ -} - -__attribute__((__target__("cpu=power10"))) -void foo_p10 (void) -{ -} - -__attribute__((__target__("cpu=power11"))) -void foo_p11 (void) -{ -} diff --git a/gcc/testsuite/gcc.target/powerpc/power11-3.c b/gcc/testsuite/gcc.target/powerpc/power11-3.c deleted file mode 100644 index abf0c5866a9..000 --- a/gcc/testsuite/gcc.target/powerpc/power11-3.c +++ /dev/null @@ -1,11 +0,0 @@ -/* { dg-do compile { target powerpc*-*-* } } */ -/* Require VSX and Linux to eliminate systems where you can't do - __attribute__((__target_clones__(...))). */ -/* { dg-options "-mdejagnu-cpu=power8 -O2" } */ - -/* Check if we can set the power11 target via a target_clones attribute. */ - -__attribute__((__target_clones__("cpu=power11,cpu=power9,default"))) -void foo (void) -{ -}
[gcc(refs/users/meissner/heads/work171)] Add -mcpu=power11 tests.
https://gcc.gnu.org/g:58db14cbdb5ad06a1571499bce99c64610b712bf commit 58db14cbdb5ad06a1571499bce99c64610b712bf Author: Michael Meissner Date: Wed Jul 3 13:28:34 2024 -0400 Add -mcpu=power11 tests. This patch adds some simple tests for -mcpu=power11 support. 2024-07-03 Michael Meissner gcc/testsuite/ * gcc.target/powerpc/power11-1.c: New test. * gcc.target/powerpc/power11-2.c: Likewise. * gcc.target/powerpc/power11-3.c: Likewise. Diff: --- gcc/testsuite/gcc.target/powerpc/power11-1.c | 12 gcc/testsuite/gcc.target/powerpc/power11-2.c | 22 ++ gcc/testsuite/gcc.target/powerpc/power11-3.c | 11 +++ 3 files changed, 45 insertions(+) diff --git a/gcc/testsuite/gcc.target/powerpc/power11-1.c b/gcc/testsuite/gcc.target/powerpc/power11-1.c new file mode 100644 index 000..a1bd9538cba --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/power11-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-options "-mdejagnu-cpu=power11 -O2" } */ + +/* Basic check to see if the compiler supports -mcpu=power11. */ + +#ifndef _ARCH_PWR11 +#error "-mcpu=power11 is not supported" +#endif + +void foo (void) +{ +} diff --git a/gcc/testsuite/gcc.target/powerpc/power11-2.c b/gcc/testsuite/gcc.target/powerpc/power11-2.c new file mode 100644 index 000..4521c2a37c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/power11-2.c @@ -0,0 +1,22 @@ +/* Require VSX and Linux to eliminate systems where you can't do + __attribute__((__target__(...))). */ +/* { dg-do compile { target { powerpc*-*-linux* } } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2" } */ + +/* Check if we can set the power11 target via a target attribute. */ + +__attribute__((__target__("cpu=power9"))) +void foo_p9 (void) +{ +} + +__attribute__((__target__("cpu=power10"))) +void foo_p10 (void) +{ +} + +__attribute__((__target__("cpu=power11"))) +void foo_p11 (void) +{ +} diff --git a/gcc/testsuite/gcc.target/powerpc/power11-3.c b/gcc/testsuite/gcc.target/powerpc/power11-3.c new file mode 100644 index 000..abf0c5866a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/power11-3.c @@ -0,0 +1,11 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* Require VSX and Linux to eliminate systems where you can't do + __attribute__((__target_clones__(...))). */ +/* { dg-options "-mdejagnu-cpu=power8 -O2" } */ + +/* Check if we can set the power11 target via a target_clones attribute. */ + +__attribute__((__target_clones__("cpu=power11,cpu=power9,default"))) +void foo (void) +{ +}
[gcc(refs/users/meissner/heads/work171)] Update ChangeLog.*
https://gcc.gnu.org/g:a80d1d3d96c859779f55c5d9593edccc42e1dc96 commit a80d1d3d96c859779f55c5d9593edccc42e1dc96 Author: Michael Meissner Date: Wed Jul 3 13:31:17 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.meissner | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner index 781999c4e75..8ec458af54a 100644 --- a/gcc/ChangeLog.meissner +++ b/gcc/ChangeLog.meissner @@ -4,7 +4,7 @@ Add -mcpu=future tuning support. This patch makes -mtune=future use the same tuning decision as -mtune=power11. -2024-06-17 Michael Meissner +2024-07-03 Michael Meissner gcc/ @@ -30,7 +30,7 @@ This patch allows GCC to be configured with the --with-cpu=future and This patch passes -mfuture to the assembler if the user uses -mcpu=future. -2024-06-17 Michael Meissner +2024-07-03 Michael Meissner gcc/ @@ -70,15 +70,13 @@ This patch adds some simple tests for -mcpu=power11 support. In order to run these tests, you need an assembler that supports the appropriate option for supporting the Power11 processor (-mpower11 under Linux or -mpwr11 under AIX). -2024-06-17 Michael Meissner +2024-07-03 Michael Meissner gcc/testsuite/ * gcc.target/powerpc/power11-1.c: New test. * gcc.target/powerpc/power11-2.c: Likewise. * gcc.target/powerpc/power11-3.c: Likewise. - * lib/target-supports.exp (check_effective_target_power11_ok): Add new - effective target. Branch work171, patch #2 @@ -86,7 +84,7 @@ Add -mcpu=power11 tuning support. This patch makes -mtune=power11 use the same tuning decisions as -mtune=power10. -2024-06-17 Michael Meissner +2024-07-02 Michael Meissner gcc/ @@ -115,7 +113,7 @@ This patch passes -mpwr11 to the assembler if the user uses -mcpu=power11. This patch adds support for using "power11" in the __builtin_cpu_is built-in function. -2024-06-17 Michael Meissner +2024-07-03 Michael Meissner gcc/
[gcc/meissner/heads/work171-bugs] (5 commits) Merge commit 'refs/users/meissner/heads/work171-bugs' of gi
The branch 'meissner/heads/work171-bugs' was updated to point to: cb841ff9503... Merge commit 'refs/users/meissner/heads/work171-bugs' of gi It previously pointed to: e972f29adad... Merge commit 'refs/users/meissner/heads/work171-bugs' of gi Diff: Summary of changes (added commits): --- cb841ff... Merge commit 'refs/users/meissner/heads/work171-bugs' of gi df3392b... Add ChangeLog.bugs and update REVISION. a80d1d3... Update ChangeLog.* (*) 58db14c... Add -mcpu=power11 tests. (*) 6b86afa... Revert changes (*) (*) This commit already exists in another branch. Because the reference `refs/users/meissner/heads/work171-bugs' matches your hooks.email-new-commits-only configuration, no separate email is sent for this commit.
[gcc(refs/users/meissner/heads/work171-bugs)] Add ChangeLog.bugs and update REVISION.
https://gcc.gnu.org/g:df3392b59a5a9c1cc6ebf03ee910d6c011758c0a commit df3392b59a5a9c1cc6ebf03ee910d6c011758c0a Author: Michael Meissner Date: Fri Jun 28 15:07:19 2024 -0400 Add ChangeLog.bugs and update REVISION. 2024-06-28 Michael Meissner gcc/ * ChangeLog.bugs: New file for branch. * REVISION: Update. Diff: --- gcc/ChangeLog.bugs | 6 ++ gcc/REVISION | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs new file mode 100644 index 000..d2c5d2ab118 --- /dev/null +++ b/gcc/ChangeLog.bugs @@ -0,0 +1,6 @@ + Branch work171-bugs, baseline + +2024-06-28 Michael Meissner + + Clone branch + diff --git a/gcc/REVISION b/gcc/REVISION index 060d0f9e831..7a2e248f4d4 100644 --- a/gcc/REVISION +++ b/gcc/REVISION @@ -1 +1 @@ -work171 branch +work171-bugs branch
[gcc(refs/users/meissner/heads/work171-bugs)] Merge commit 'refs/users/meissner/heads/work171-bugs' of git+ssh://gcc.gnu.org/git/gcc into me/work1
https://gcc.gnu.org/g:cb841ff9503005daeae92ceba50a83b7ce709210 commit cb841ff9503005daeae92ceba50a83b7ce709210 Merge: df3392b59a5 e972f29adad Author: Michael Meissner Date: Wed Jul 3 13:34:59 2024 -0400 Merge commit 'refs/users/meissner/heads/work171-bugs' of git+ssh://gcc.gnu.org/git/gcc into me/work171-bugs Diff:
[gcc/meissner/heads/work171-dmf] (5 commits) Merge commit 'refs/users/meissner/heads/work171-dmf' of git
The branch 'meissner/heads/work171-dmf' was updated to point to: 2d54858a7bb... Merge commit 'refs/users/meissner/heads/work171-dmf' of git It previously pointed to: 0afd530ede7... Merge commit 'refs/users/meissner/heads/work171-dmf' of git Diff: Summary of changes (added commits): --- 2d54858... Merge commit 'refs/users/meissner/heads/work171-dmf' of git d9c3752... Add ChangeLog.dmf and update REVISION. a80d1d3... Update ChangeLog.* (*) 58db14c... Add -mcpu=power11 tests. (*) 6b86afa... Revert changes (*) (*) This commit already exists in another branch. Because the reference `refs/users/meissner/heads/work171-dmf' matches your hooks.email-new-commits-only configuration, no separate email is sent for this commit.
[gcc(refs/users/meissner/heads/work171-dmf)] Add ChangeLog.dmf and update REVISION.
https://gcc.gnu.org/g:d9c3752bb16abcaa2ac4527d7df81f802b31234b commit d9c3752bb16abcaa2ac4527d7df81f802b31234b Author: Michael Meissner Date: Fri Jun 28 15:04:38 2024 -0400 Add ChangeLog.dmf and update REVISION. 2024-06-28 Michael Meissner gcc/ * ChangeLog.dmf: New file for branch. * REVISION: Update. Diff: --- gcc/ChangeLog.dmf | 6 ++ gcc/REVISION | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf new file mode 100644 index 000..bf4edca66c7 --- /dev/null +++ b/gcc/ChangeLog.dmf @@ -0,0 +1,6 @@ + Branch work171-dmf, baseline + +2024-06-28 Michael Meissner + + Clone branch + diff --git a/gcc/REVISION b/gcc/REVISION index 060d0f9e831..e8fb95125d2 100644 --- a/gcc/REVISION +++ b/gcc/REVISION @@ -1 +1 @@ -work171 branch +work171-dmf branch
[gcc(refs/users/meissner/heads/work171-dmf)] Merge commit 'refs/users/meissner/heads/work171-dmf' of git+ssh://gcc.gnu.org/git/gcc into me/work17
https://gcc.gnu.org/g:2d54858a7bb66c4acbb68dfe109bb761a98be60f commit 2d54858a7bb66c4acbb68dfe109bb761a98be60f Merge: d9c3752bb16 0afd530ede7 Author: Michael Meissner Date: Wed Jul 3 13:39:23 2024 -0400 Merge commit 'refs/users/meissner/heads/work171-dmf' of git+ssh://gcc.gnu.org/git/gcc into me/work171-dmf Diff:
[gcc/meissner/heads/work171-tar] (5 commits) Merge commit 'refs/users/meissner/heads/work171-tar' of git
The branch 'meissner/heads/work171-tar' was updated to point to: 22640e2b601... Merge commit 'refs/users/meissner/heads/work171-tar' of git It previously pointed to: e2490ea0551... Merge commit 'refs/users/meissner/heads/work171-tar' of git Diff: Summary of changes (added commits): --- 22640e2... Merge commit 'refs/users/meissner/heads/work171-tar' of git 220389c... Add ChangeLog.tar and update REVISION. a80d1d3... Update ChangeLog.* (*) 58db14c... Add -mcpu=power11 tests. (*) 6b86afa... Revert changes (*) (*) This commit already exists in another branch. Because the reference `refs/users/meissner/heads/work171-tar' matches your hooks.email-new-commits-only configuration, no separate email is sent for this commit.
[gcc(refs/users/meissner/heads/work171-tar)] Add ChangeLog.tar and update REVISION.
https://gcc.gnu.org/g:220389c2c09c3fc3819db5fb825fb30355d9fc69 commit 220389c2c09c3fc3819db5fb825fb30355d9fc69 Author: Michael Meissner Date: Fri Jun 28 15:06:24 2024 -0400 Add ChangeLog.tar and update REVISION. 2024-06-28 Michael Meissner gcc/ * ChangeLog.tar: New file for branch. * REVISION: Update. Diff: --- gcc/ChangeLog.tar | 6 ++ gcc/REVISION | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar new file mode 100644 index 000..dbdff04e2fa --- /dev/null +++ b/gcc/ChangeLog.tar @@ -0,0 +1,6 @@ + Branch work171-tar, baseline + +2024-06-28 Michael Meissner + + Clone branch + diff --git a/gcc/REVISION b/gcc/REVISION index 060d0f9e831..da2e6621959 100644 --- a/gcc/REVISION +++ b/gcc/REVISION @@ -1 +1 @@ -work171 branch +work171-tar branch
[gcc(refs/users/meissner/heads/work171-tar)] Merge commit 'refs/users/meissner/heads/work171-tar' of git+ssh://gcc.gnu.org/git/gcc into me/work17
https://gcc.gnu.org/g:22640e2b601dcb70040fa7d8118e82bad3166de4 commit 22640e2b601dcb70040fa7d8118e82bad3166de4 Merge: 220389c2c09 e2490ea0551 Author: Michael Meissner Date: Wed Jul 3 13:41:06 2024 -0400 Merge commit 'refs/users/meissner/heads/work171-tar' of git+ssh://gcc.gnu.org/git/gcc into me/work171-tar Diff:
[gcc/meissner/heads/work171-test] (5 commits) Merge commit 'refs/users/meissner/heads/work171-test' of gi
The branch 'meissner/heads/work171-test' was updated to point to: 3e791e9d1f1... Merge commit 'refs/users/meissner/heads/work171-test' of gi It previously pointed to: 16f55933794... Merge commit 'refs/users/meissner/heads/work171-test' of gi Diff: Summary of changes (added commits): --- 3e791e9... Merge commit 'refs/users/meissner/heads/work171-test' of gi 9b8cccf... Add ChangeLog.test and update REVISION. a80d1d3... Update ChangeLog.* (*) 58db14c... Add -mcpu=power11 tests. (*) 6b86afa... Revert changes (*) (*) This commit already exists in another branch. Because the reference `refs/users/meissner/heads/work171-test' matches your hooks.email-new-commits-only configuration, no separate email is sent for this commit.
[gcc(refs/users/meissner/heads/work171-test)] Add ChangeLog.test and update REVISION.
https://gcc.gnu.org/g:9b8cccfe5e7a9d49509708f67738e204e9c4491a commit 9b8cccfe5e7a9d49509708f67738e204e9c4491a Author: Michael Meissner Date: Fri Jun 28 15:08:09 2024 -0400 Add ChangeLog.test and update REVISION. 2024-06-28 Michael Meissner gcc/ * ChangeLog.test: New file for branch. * REVISION: Update. Diff: --- gcc/ChangeLog.test | 6 ++ gcc/REVISION | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.test b/gcc/ChangeLog.test new file mode 100644 index 000..c2893a28120 --- /dev/null +++ b/gcc/ChangeLog.test @@ -0,0 +1,6 @@ + Branch work171-test, baseline + +2024-06-28 Michael Meissner + + Clone branch + diff --git a/gcc/REVISION b/gcc/REVISION index 060d0f9e831..78d94f8c4ae 100644 --- a/gcc/REVISION +++ b/gcc/REVISION @@ -1 +1 @@ -work171 branch +work171-test branch
[gcc(refs/users/meissner/heads/work171-test)] Merge commit 'refs/users/meissner/heads/work171-test' of git+ssh://gcc.gnu.org/git/gcc into me/work1
https://gcc.gnu.org/g:3e791e9d1f1399e4dadb8b32c9d41bd6f2809a76 commit 3e791e9d1f1399e4dadb8b32c9d41bd6f2809a76 Merge: 9b8cccfe5e7 16f55933794 Author: Michael Meissner Date: Wed Jul 3 13:42:50 2024 -0400 Merge commit 'refs/users/meissner/heads/work171-test' of git+ssh://gcc.gnu.org/git/gcc into me/work171-test Diff:
[gcc/meissner/heads/work171-vpair] (5 commits) Merge commit 'refs/users/meissner/heads/work171-vpair' of g
The branch 'meissner/heads/work171-vpair' was updated to point to: d25568d1da5... Merge commit 'refs/users/meissner/heads/work171-vpair' of g It previously pointed to: e073a26637a... Merge commit 'refs/users/meissner/heads/work171-vpair' of g Diff: Summary of changes (added commits): --- d25568d... Merge commit 'refs/users/meissner/heads/work171-vpair' of g d366bcb... Add ChangeLog.vpair and update REVISION. a80d1d3... Update ChangeLog.* (*) 58db14c... Add -mcpu=power11 tests. (*) 6b86afa... Revert changes (*) (*) This commit already exists in another branch. Because the reference `refs/users/meissner/heads/work171-vpair' matches your hooks.email-new-commits-only configuration, no separate email is sent for this commit.
[gcc(refs/users/meissner/heads/work171-vpair)] Add ChangeLog.vpair and update REVISION.
https://gcc.gnu.org/g:d366bcb543d604c108a6f18c135863c35cb7db7c commit d366bcb543d604c108a6f18c135863c35cb7db7c Author: Michael Meissner Date: Fri Jun 28 15:05:36 2024 -0400 Add ChangeLog.vpair and update REVISION. 2024-06-28 Michael Meissner gcc/ * ChangeLog.vpair: New file for branch. * REVISION: Update. Diff: --- gcc/ChangeLog.vpair | 6 ++ gcc/REVISION| 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair new file mode 100644 index 000..eb6dd427796 --- /dev/null +++ b/gcc/ChangeLog.vpair @@ -0,0 +1,6 @@ + Branch work171-vpair, baseline + +2024-06-28 Michael Meissner + + Clone branch + diff --git a/gcc/REVISION b/gcc/REVISION index 060d0f9e831..6cef7066a80 100644 --- a/gcc/REVISION +++ b/gcc/REVISION @@ -1 +1 @@ -work171 branch +work171-vpair branch
[gcc(refs/users/meissner/heads/work171-vpair)] Merge commit 'refs/users/meissner/heads/work171-vpair' of git+ssh://gcc.gnu.org/git/gcc into me/work
https://gcc.gnu.org/g:d25568d1da54417f4aebea618d4ef27dab8325a6 commit d25568d1da54417f4aebea618d4ef27dab8325a6 Merge: d366bcb543d e073a26637a Author: Michael Meissner Date: Wed Jul 3 13:44:06 2024 -0400 Merge commit 'refs/users/meissner/heads/work171-vpair' of git+ssh://gcc.gnu.org/git/gcc into me/work171-vpair Diff:
[gcc r14-10375] hppa: Fix ICE caused by mismatched predicate and constraint in xmpyu patterns
https://gcc.gnu.org/g:acde9f81da39450b90e12ccf937d35aa8da1b478 commit r14-10375-gacde9f81da39450b90e12ccf937d35aa8da1b478 Author: John David Anglin Date: Sun Jun 30 09:48:21 2024 -0400 hppa: Fix ICE caused by mismatched predicate and constraint in xmpyu patterns 2024-06-30 John David Anglin gcc/ChangeLog: PR target/115691 * config/pa/pa.md: Remove incorrect xmpyu patterns. Diff: --- gcc/config/pa/pa.md | 18 -- 1 file changed, 18 deletions(-) diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index b0f29a44bae..9e410f43052 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -5503,24 +5503,6 @@ [(set_attr "type" "fpmuldbl") (set_attr "length" "4")]) -(define_insn "" - [(set (match_operand:DI 0 "register_operand" "=f") - (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "f")) -(match_operand:DI 2 "uint32_operand" "f")))] - "TARGET_PA_11 && ! TARGET_SOFT_FLOAT && ! TARGET_SOFT_MULT && !TARGET_64BIT" - "xmpyu %1,%R2,%0" - [(set_attr "type" "fpmuldbl") - (set_attr "length" "4")]) - -(define_insn "" - [(set (match_operand:DI 0 "register_operand" "=f") - (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "f")) -(match_operand:DI 2 "uint32_operand" "f")))] - "TARGET_PA_11 && ! TARGET_SOFT_FLOAT && ! TARGET_SOFT_MULT && TARGET_64BIT" - "xmpyu %1,%2R,%0" - [(set_attr "type" "fpmuldbl") - (set_attr "length" "4")]) - (define_insn "" [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25))) (clobber (match_operand:SI 0 "register_operand" "=a"))
[gcc r13-8888] hppa: Fix ICE caused by mismatched predicate and constraint in xmpyu patterns
https://gcc.gnu.org/g:ecd6ebe5fb0151f9649705a5798325032bbc811a commit r13--gecd6ebe5fb0151f9649705a5798325032bbc811a Author: John David Anglin Date: Sun Jun 30 09:48:21 2024 -0400 hppa: Fix ICE caused by mismatched predicate and constraint in xmpyu patterns 2024-06-30 John David Anglin gcc/ChangeLog: PR target/115691 * config/pa/pa.md: Remove incorrect xmpyu patterns. Diff: --- gcc/config/pa/pa.md | 18 -- 1 file changed, 18 deletions(-) diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index 36d20576102..d832a29683c 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -5493,24 +5493,6 @@ [(set_attr "type" "fpmuldbl") (set_attr "length" "4")]) -(define_insn "" - [(set (match_operand:DI 0 "register_operand" "=f") - (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "f")) -(match_operand:DI 2 "uint32_operand" "f")))] - "TARGET_PA_11 && ! TARGET_SOFT_FLOAT && ! TARGET_SOFT_MULT && !TARGET_64BIT" - "xmpyu %1,%R2,%0" - [(set_attr "type" "fpmuldbl") - (set_attr "length" "4")]) - -(define_insn "" - [(set (match_operand:DI 0 "register_operand" "=f") - (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "f")) -(match_operand:DI 2 "uint32_operand" "f")))] - "TARGET_PA_11 && ! TARGET_SOFT_FLOAT && ! TARGET_SOFT_MULT && TARGET_64BIT" - "xmpyu %1,%2R,%0" - [(set_attr "type" "fpmuldbl") - (set_attr "length" "4")]) - (define_insn "" [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25))) (clobber (match_operand:SI 0 "register_operand" "=a"))
[gcc r12-10597] hppa: Fix ICE caused by mismatched predicate and constraint in xmpyu patterns
https://gcc.gnu.org/g:0c98d9479cec88148eb3be8d0098e36bce061cd6 commit r12-10597-g0c98d9479cec88148eb3be8d0098e36bce061cd6 Author: John David Anglin Date: Sun Jun 30 09:48:21 2024 -0400 hppa: Fix ICE caused by mismatched predicate and constraint in xmpyu patterns 2024-06-30 John David Anglin gcc/ChangeLog: PR target/115691 * config/pa/pa.md: Remove incorrect xmpyu patterns. Diff: --- gcc/config/pa/pa.md | 18 -- 1 file changed, 18 deletions(-) diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index d82f168c8a3..43241958722 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -5493,24 +5493,6 @@ [(set_attr "type" "fpmuldbl") (set_attr "length" "4")]) -(define_insn "" - [(set (match_operand:DI 0 "register_operand" "=f") - (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "f")) -(match_operand:DI 2 "uint32_operand" "f")))] - "TARGET_PA_11 && ! TARGET_SOFT_FLOAT && ! TARGET_SOFT_MULT && !TARGET_64BIT" - "xmpyu %1,%R2,%0" - [(set_attr "type" "fpmuldbl") - (set_attr "length" "4")]) - -(define_insn "" - [(set (match_operand:DI 0 "register_operand" "=f") - (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "f")) -(match_operand:DI 2 "uint32_operand" "f")))] - "TARGET_PA_11 && ! TARGET_SOFT_FLOAT && ! TARGET_SOFT_MULT && TARGET_64BIT" - "xmpyu %1,%2R,%0" - [(set_attr "type" "fpmuldbl") - (set_attr "length" "4")]) - (define_insn "" [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25))) (clobber (match_operand:SI 0 "register_operand" "=a"))
[gcc r11-11555] hppa: Fix ICE caused by mismatched predicate and constraint in xmpyu patterns
https://gcc.gnu.org/g:ce713016fb50796e906e39ba4244fbaf47ae77a9 commit r11-11555-gce713016fb50796e906e39ba4244fbaf47ae77a9 Author: John David Anglin Date: Sun Jun 30 09:48:21 2024 -0400 hppa: Fix ICE caused by mismatched predicate and constraint in xmpyu patterns 2024-06-30 John David Anglin gcc/ChangeLog: PR target/115691 * config/pa/pa.md: Remove incorrect xmpyu patterns. Diff: --- gcc/config/pa/pa.md | 18 -- 1 file changed, 18 deletions(-) diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index b252486fa94..072e62455d8 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -5493,24 +5493,6 @@ [(set_attr "type" "fpmuldbl") (set_attr "length" "4")]) -(define_insn "" - [(set (match_operand:DI 0 "register_operand" "=f") - (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "f")) -(match_operand:DI 2 "uint32_operand" "f")))] - "TARGET_PA_11 && ! TARGET_SOFT_FLOAT && ! TARGET_SOFT_MULT && !TARGET_64BIT" - "xmpyu %1,%R2,%0" - [(set_attr "type" "fpmuldbl") - (set_attr "length" "4")]) - -(define_insn "" - [(set (match_operand:DI 0 "register_operand" "=f") - (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "f")) -(match_operand:DI 2 "uint32_operand" "f")))] - "TARGET_PA_11 && ! TARGET_SOFT_FLOAT && ! TARGET_SOFT_MULT && TARGET_64BIT" - "xmpyu %1,%2R,%0" - [(set_attr "type" "fpmuldbl") - (set_attr "length" "4")]) - (define_insn "" [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25))) (clobber (match_operand:SI 0 "register_operand" "=a"))
[gcc r15-1827] Revert "Delete MALLOC_ABI_ALIGNMENT define from pa32-linux.h"
https://gcc.gnu.org/g:ad2206d576603c94b0c1778c84b7f43fbf8a13b4 commit r15-1827-gad2206d576603c94b0c1778c84b7f43fbf8a13b4 Author: John David Anglin Date: Wed Jul 3 14:34:47 2024 -0400 Revert "Delete MALLOC_ABI_ALIGNMENT define from pa32-linux.h" This reverts commit 0ee3266b3dec4d984d43c79e2b3e649256e3eaaa. Diff: --- gcc/config/pa/pa32-linux.h | 5 + 1 file changed, 5 insertions(+) diff --git a/gcc/config/pa/pa32-linux.h b/gcc/config/pa/pa32-linux.h index 63abba26deb..187ae62b0f8 100644 --- a/gcc/config/pa/pa32-linux.h +++ b/gcc/config/pa/pa32-linux.h @@ -68,6 +68,11 @@ call_ ## FUNC (void) \ #undef WCHAR_TYPE_SIZE #define WCHAR_TYPE_SIZE BITS_PER_WORD +/* POSIX types such as pthread_mutex_t require 16-byte alignment to retain + layout compatibility with the original linux thread implementation. */ +#undef MALLOC_ABI_ALIGNMENT +#define MALLOC_ABI_ALIGNMENT 128 + /* Place jump tables in the text section except when generating non-PIC code. When generating non-PIC code, the relocations needed to load the address of the jump table result in a text label in the final executable
[gcc r14-10376] Revert "Delete MALLOC_ABI_ALIGNMENT define from pa32-linux.h"
https://gcc.gnu.org/g:6e1fb1f9db3b722598a7332b92f4470a7bbc9c95 commit r14-10376-g6e1fb1f9db3b722598a7332b92f4470a7bbc9c95 Author: John David Anglin Date: Wed Jul 3 14:34:47 2024 -0400 Revert "Delete MALLOC_ABI_ALIGNMENT define from pa32-linux.h" This reverts commit 0ee3266b3dec4d984d43c79e2b3e649256e3eaaa. Diff: --- gcc/config/pa/pa32-linux.h | 5 + 1 file changed, 5 insertions(+) diff --git a/gcc/config/pa/pa32-linux.h b/gcc/config/pa/pa32-linux.h index 63abba26deb..187ae62b0f8 100644 --- a/gcc/config/pa/pa32-linux.h +++ b/gcc/config/pa/pa32-linux.h @@ -68,6 +68,11 @@ call_ ## FUNC (void) \ #undef WCHAR_TYPE_SIZE #define WCHAR_TYPE_SIZE BITS_PER_WORD +/* POSIX types such as pthread_mutex_t require 16-byte alignment to retain + layout compatibility with the original linux thread implementation. */ +#undef MALLOC_ABI_ALIGNMENT +#define MALLOC_ABI_ALIGNMENT 128 + /* Place jump tables in the text section except when generating non-PIC code. When generating non-PIC code, the relocations needed to load the address of the jump table result in a text label in the final executable
[gcc r15-1828] [committed] Fix previously latent bug in reorg affecting cris port
https://gcc.gnu.org/g:e5f73853ae78d4e9ae434c707a12da1494459b24 commit r15-1828-ge5f73853ae78d4e9ae434c707a12da1494459b24 Author: Jeff Law Date: Wed Jul 3 12:47:31 2024 -0600 [committed] Fix previously latent bug in reorg affecting cris port The late-combine patch has triggered a previously latent bug in reorg. Basically we have a sequence like this in the middle of reorg before we start relaxing delay slots (cris-elf, gcc.dg/torture/pr98289.c) > (insn 67 49 18 (sequence [ > (jump_insn 50 49 52 (set (pc) > (if_then_else (ne (reg:CC 19 ccr) > (const_int 0 [0])) > (label_ref:SI 30) > (pc))) "j.c":10:6 discrim 1 282 {*bnecc} > (expr_list:REG_DEAD (reg:CC 19 ccr) > (int_list:REG_BR_PROB 7 (nil))) > -> 30) > (insn/f 52 50 18 (set (mem:SI (reg/f:SI 14 sp) [1 S4 A8]) > (reg:SI 16 srp)) 37 {*mov_tomemsi} > (nil)) > ]) "j.c":10:6 discrim 1 -1 > (nil)) > > (note 18 67 54 [bb 3] NOTE_INSN_BASIC_BLOCK) > > (note 54 18 55 NOTE_INSN_EPILOGUE_BEG) > > (jump_insn 55 54 56 (return) "j.c":14:1 228 {*return_expanded} > (nil) > -> return) > > (barrier 56 55 43) > > (note 43 56 65 [bb 4] NOTE_INSN_BASIC_BLOCK) > > (note 65 43 30 NOTE_INSN_SWITCH_TEXT_SECTIONS) > > (code_label 30 65 8 5 6 (nil) [1 uses]) > > (note 8 30 61 [bb 5] NOTE_INSN_BASIC_BLOCK) So at a high level the things to note are that insn 50 conditionally jumps around insn 55. Second there's a SWITCH_TEXT_SECTIONS note between insn 50 and the target label for insn 50 (code_label 30). reorg sees the conditional jump around the unconditional jump/return and will invert the jump and retarget the original jump to an appropriate location. In this case generating: > (insn 67 49 18 (sequence [ > (jump_insn 50 49 52 (set (pc) > (if_then_else (eq (reg:CC 19 ccr) > (const_int 0 [0])) > (label_ref:SI 68) > (pc))) "j.c":10:6 discrim 1 281 {*beqcc} > (expr_list:REG_DEAD (reg:CC 19 ccr) > (int_list:REG_BR_PROB 1073741831 (nil))) > -> 68) > (insn/s/f 52 50 18 (set (mem:SI (reg/f:SI 14 sp) [1 S4 A8]) > (reg:SI 16 srp)) 37 {*mov_tomemsi} > (nil)) > ]) "j.c":10:6 discrim 1 -1 > (nil)) > > (note 18 67 54 [bb 3] NOTE_INSN_BASIC_BLOCK) > > (note 54 18 43 NOTE_INSN_EPILOGUE_BEG) > > (note 43 54 65 [bb 4] NOTE_INSN_BASIC_BLOCK) > > (note 65 43 8 NOTE_INSN_SWITCH_TEXT_SECTIONS) > > (note 8 65 61 [bb 5] NOTE_INSN_BASIC_BLOCK) [ ... ] Where the new target of the jump is a return statement later in the IL. Note that we now have a SWITCH_TEXT_SECTIONS note that is not immediately preceded by a BARRIER. That triggers an assertion in the dwarf2 code. Removal of the BARRIER is inherent in this optimization. The fix is simple, we avoid this optimization when there's a SWITCH_TEXT_SECTIONS note between the conditional jump insn and its target. Thankfully we already have a routine to test for this in reorg, so we just need to call it appropriately. The other approach would be to drop the note which I considered and discarded. We don't have great coverage for delay slot targets. I've tested arc, cris, fr30, frv, h8, iq2000, microblaze, or1k, sh3 visium in my tester as crosses without new regressions, fixing one regression along the way. Bootstrap & regression testing on sh4 and hppa will take considerably longer. gcc/ * reorg.cc (relax_delay_slots): Do not optimize a conditional jump around an unconditional jump/return in the presence of a text section switch. Diff: --- gcc/reorg.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/reorg.cc b/gcc/reorg.cc index 99228a22c69..633099ca765 100644 --- a/gcc/reorg.cc +++ b/gcc/reorg.cc @@ -3409,7 +3409,8 @@ relax_delay_slots (rtx_insn *first) && next && simplejump_or_return_p (next) && (next_active_insn (as_a (target_label)) == next_active_insn (next)) - && no_labels_between_p (insn, next)) + && no_labels_between_p (insn, next) + && !switch_text_sections_between_p (insn, next_active_insn (next))) { rtx label = JUMP_LABEL (next); rtx old_label = JUMP_LABEL (delay_jump_insn);
[gcc(refs/users/meissner/heads/work171-tar)] Add support for the TAR register.
https://gcc.gnu.org/g:20ea7e93905d1b23e0507fdb7e4aad8bd2aa9419 commit 20ea7e93905d1b23e0507fdb7e4aad8bd2aa9419 Author: Michael Meissner Date: Wed Jul 3 16:02:29 2024 -0400 Add support for the TAR register. 2024-07-03 Michael Meissner gcc/ * config/rs6000/constraints.md (h constraint): Add TAR register to the documentation. (wt constraint): New constraint. * config/rs6000/rs6000-cpus.def (ISA_3_0_MASKS_SERVER): Add -mtar. (POWERPC_MASKS): Likewise. * config/rs6000/rs6000.cc (rs6000_reg_names): Add TAR register support. (alt_reg_names): Likewise. (rs6000_hard_regno_mode_ok_uncached): Restrict SPR registers to only hold scalar integer modes of an appropriate size. Add TAR register support. (rs6000_debug_reg_global): Print the register class that wt maps too. (rs6000_init_hard_regno_mode_ok): Add TAR register support. (rs6000_conditional_register_usage): Add TAR register support. (print_operand): Likewise. (rs6000_debugger_regno): Likewise. (rs6000_opt_masks): Add support for -mtar. * config/rs6000/rs6000.h (FIRST_PSEUDO_REGISTER): Add TAR register support. (FIXED_REGISTERS): Likewise. (CALL_REALLY_USED_REGISTERS): Likewise. (REG_ALLOC_ORDER): Likewise. (enum reg_class): Likewise. (REG_CLASS_NAMES): Likewise. (REG_CLASS_CONTENTS): Likewise. (enum r6000_reg_class_enum): Add support for the wt constraint. * config/rs6000/rs6000.md (TAR_REGNO): New constant. (call_indirect_nonlocal_sysv): Likewise. (call_value_indirect_nonlocal_sysv): Likewise. (call_indirect_aix): Likewise. (call_value_indirect_aix): Likewise. (call_indirect_elfv2): Likewise. (call_indirect_pcrel): Likewise. (call_value_indirect_elfv2): Likewise. (call_value_indirect_pcrel): Likewise. (*sibcall_indirect_nonlocal_sysv): Likewise. (sibcall_value_indirect_nonlocal_sysv): Likewise. (indirect_jump): Likewise. (@indirect_jump_nospec): Likewise. (@tablejump_insn_normal): Likewise. (@tablejump_insn_nospec): Likewise. * config/rs6000/rs6000.opt (-mtar): New option. gcc/testsuite/ * gcc.target/powerpc/ppc-switch-1.c: Update test for the TAR register. * gcc.target/powerpc/pr51513.c: Likewise. * gcc.target/powerpc/safe-indirect-jump-2.c: Likewise. * gcc.target/powerpc/safe-indirect-jump-3.c: Likewise. * gcc.target/powerpc/tar-register.c: New test. Diff: --- gcc/config/rs6000/constraints.md | 5 +- gcc/config/rs6000/rs6000-cpus.def | 4 +- gcc/config/rs6000/rs6000.cc| 58 +++--- gcc/config/rs6000/rs6000.h | 31 +++- gcc/config/rs6000/rs6000.md| 33 ++-- gcc/config/rs6000/rs6000.opt | 4 ++ gcc/testsuite/gcc.target/powerpc/ppc-switch-1.c| 4 +- gcc/testsuite/gcc.target/powerpc/pr51513.c | 4 +- .../gcc.target/powerpc/safe-indirect-jump-2.c | 2 +- .../gcc.target/powerpc/safe-indirect-jump-3.c | 2 +- gcc/testsuite/gcc.target/powerpc/tar-register.c| 34 + 11 files changed, 138 insertions(+), 43 deletions(-) diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 369a7b75042..14f0465d7ae 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -57,7 +57,7 @@ "@internal A compatibility alias for @code{wa}.") (define_register_constraint "h" "SPECIAL_REGS" - "@internal A special register (@code{vrsave}, @code{ctr}, or @code{lr}).") + "@internal A special register (@code{vrsave}, @code{ctr}, @code{lr} or @code{tar}).") (define_register_constraint "c" "CTR_REGS" "The count register, @code{ctr}.") @@ -91,6 +91,9 @@ "@internal Like @code{r}, if @option{-mpowerpc64} is used; otherwise, @code{NO_REGS}.") +(define_register_constraint "wt" "rs6000_constraints[RS6000_CONSTRAINT_wt]" + "The tar register, @code{tar}.") + (define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]" "@internal Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise, @code{NO_REGS}.") diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index d625dbeb91f..4c0b5ca8cb8 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -65,7 +65,8 @@ | OPTION_MASK_MODULO \ | OPTION_MASK_P9_MINMAX \ |
[gcc(refs/users/meissner/heads/work171-tar)] Update ChangeLog.*
https://gcc.gnu.org/g:ec76bc555a845c118fb866ab0cedb6a43b5bd064 commit ec76bc555a845c118fb866ab0cedb6a43b5bd064 Author: Michael Meissner Date: Wed Jul 3 16:04:01 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.tar | 37 - 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar index dbdff04e2fa..797d13695e8 100644 --- a/gcc/ChangeLog.tar +++ b/gcc/ChangeLog.tar @@ -1,6 +1,41 @@ + Branch work171-tar, patch #200 + +Restrict SPR to appropriate integer modes. + +In preparation for the patches to add support for the TAR register, I restricted +the modes that special purpose registers (SPRs) could hold to be appropriate +sized scalar integers. I have discovered occasionally when GCC has run out of +registers, it will use the SPRs to hold values instead of spilling them to the +stack. The LR/CTR registers can hold 8/16/32-bit values and on 64-bit systems, +they can also hold 64-bit values. The VRSAVE and VSCR registers can only hold +32-bit values. + +2024-06-20 Michael Meissner + +gcc/ + + * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Restrict + SPR registers to only hold scalar integer modes of an appropriate size. + * config/rs6000/rs6000.md (movcc_): Remove alternatives that move + values to/from the SPRs. + (movsf_hardfloat): Likewise. + (movsd_hardfloat): Likewise. + (mov_softfloat): Likewise. + (mov_softfloat32): Likewise. + (mov_hardfloat64): Likewise. + (*mov_softfloat64): Likewise. + Branch work171-tar, baseline +Add ChangeLog.tar and update REVISION. + +2024-06-17 Michael Meissner + +gcc/ + + * ChangeLog.tar: New file for branch. + * REVISION: Update. + 2024-06-28 Michael Meissner Clone branch -
[gcc(refs/users/meissner/heads/work171-tar)] Remove SPR alternatives for move insns.
https://gcc.gnu.org/g:dc64c11d40e8a57a665d61eeb5ab50ad5bee1513 commit dc64c11d40e8a57a665d61eeb5ab50ad5bee1513 Author: Michael Meissner Date: Wed Jul 3 19:10:09 2024 -0400 Remove SPR alternatives for move insns. 2024-07-03 Michael Meissner * config/rs6000/rs6000.md (mov_internal): Remove alternatives for moving values to/from SPR registers. (movcc_): Likewise. (movsf_hardfloat): Likewise. (movsd_hardfloat): Likewise. (mov_softfloat): Likewise. (mov_hardfloat64): Likewise. (mov_softfloat64): Likewise. Diff: --- gcc/config/rs6000/rs6000.md | 114 +--- 1 file changed, 44 insertions(+), 70 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 90c3785e7af..8d7f3445c46 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -8064,16 +8064,16 @@ ;; MR LHZ/LBZLXSI*ZXSTH/STBSTXSI*XLI ;; XXLOR load 0 load -1VSPLTI*# MFVSRWZ -;; MTVSRWZ MF%1 MT%1 NOP +;; MTVSRWZ (define_insn "*mov_internal" [(set (match_operand:QHI 0 "nonimmediate_operand" "=r,r, wa,m, ?Z,r, wa,wa,wa,v, ?v,r, -wa,r, *c*l, *h") +wa") (match_operand:QHI 1 "input_operand" "r, m, ?Z,r, wa,i, wa,O, wM,wB,wS,wa, -r, *h,r, 0"))] +r"))] "gpc_reg_operand (operands[0], mode) || gpc_reg_operand (operands[1], mode)" "@ @@ -8089,22 +8089,19 @@ vspltis %0,%1 # mfvsrwz %0,%x1 - mtvsrwz %x0,%1 - mf%1 %0 - mt%0 %1 - nop" + mtvsrwz %x0,%1" [(set_attr "type" "*, load, fpload,store, fpstore, *, vecsimple, vecperm, vecperm, vecperm, vecperm, mfvsr, -mtvsr, mfjmpr,mtjmpr,*") +mtvsr") (set_attr "length" "*, *, *, *, *, *, *, *, *, *, 8, *, -*, *, *, *") +*") (set_attr "isa" "*, *, p9v, *, p9v, *, p9v, p9v, p9v, p9v, p9v, p9v, -p9v, *, *, *")]) +p9v")]) ;; Here is how to move condition codes around. When we store CC data in @@ -8120,9 +8117,9 @@ (define_insn "*movcc_" [(set (match_operand:CC_any 0 "nonimmediate_operand" - "=y,x,?y,y,r,r,r,r, r,*c*l,r,m") + "=y,x,?y,y,r,r,r,r,r,m") (match_operand:CC_any 1 "general_operand" - " y,r, r,O,x,y,r,I,*h, r,m,r"))] + " y,r, r,O,x,y,r,I,m,r"))] "register_operand (operands[0], mode) || register_operand (operands[1], mode)" "@ @@ -8134,8 +8131,6 @@ mfcr %0%Q1\;rlwinm %0,%0,%f1,0xf000 mr %0,%1 li %0,%1 - mf%1 %0 - mt%0 %1 lwz%U1%X1 %0,%1 stw%U0%X0 %1,%0" [(set_attr_alternative "type" @@ -8149,11 +8144,9 @@ (const_string "mfcrf") (const_string "mfcr")) (const_string "integer") (const_string "integer") - (const_string "mfjmpr") - (const_string "mtjmpr") (const_string "load") (const_string "store")]) - (set_attr "length" "*,*,12,*,*,8,*,*,*,*,*,*")]) + (set_attr "length" "*,*,12,*,*,8,*,*,*,*")]) ;; For floating-point, we normally deal with the floating-point registers ;; unless -msoft-float is used. The sole exception is that parameter passing @@ -8204,17 +8197,17 @@ ;; ;; LWZ LFSLXSSP LXSSPX STFS STXSSP ;; STXSSPX STWXXLXOR LI FMRXSCPSGNDP -;; MR MT MF NOPXXSPLTIDP +;; MR XXSPLTIDP (define_insn "movsf_hardfloat" [(set (match_operand:SF 0 "nonimmediate_operand" "=!r, f, v, wa,m, wY, Z, m, wa, !r,f, wa, - !r,*c*l, !r, *h,wa") + !r,wa") (match_operand:SF 1 "input_operand" "m, m, wY, Z, f, v, wa,r, j, j, f, wa, - r, r, *h, 0, eP"))] + r, eP"))] "(register_operand (operands[0], SFmode) || register_operand (ope
[gcc(refs/users/meissner/heads/work171-tar)] Update ChangeLog.*
https://gcc.gnu.org/g:241891477aab13135aa49e1441abbe6eee44afbb commit 241891477aab13135aa49e1441abbe6eee44afbb Author: Michael Meissner Date: Wed Jul 3 19:11:18 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.tar | 15 +++ 1 file changed, 15 insertions(+) diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar index 797d13695e8..058a06886ea 100644 --- a/gcc/ChangeLog.tar +++ b/gcc/ChangeLog.tar @@ -1,3 +1,18 @@ + Branch work171-tar, patch #201 + +Remove SPR alternatives for move insns. + +2024-07-03 Michael Meissner + + * config/rs6000/rs6000.md (mov_internal): Remove alternatives for + moving values to/from SPR registers. + (movcc_): Likewise. + (movsf_hardfloat): Likewise. + (movsd_hardfloat): Likewise. + (mov_softfloat): Likewise. + (mov_hardfloat64): Likewise. + (mov_softfloat64): Likewise. + Branch work171-tar, patch #200 Restrict SPR to appropriate integer modes.
[gcc(refs/users/meissner/heads/work171-tar)] Update ChangeLog.*
https://gcc.gnu.org/g:94e5c5ee7f41ea763c4c60cb6e82972a346ad699 commit 94e5c5ee7f41ea763c4c60cb6e82972a346ad699 Author: Michael Meissner Date: Wed Jul 3 19:18:06 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.tar | 71 +++ 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar index 058a06886ea..692ad14da95 100644 --- a/gcc/ChangeLog.tar +++ b/gcc/ChangeLog.tar @@ -15,30 +15,61 @@ Remove SPR alternatives for move insns. Branch work171-tar, patch #200 -Restrict SPR to appropriate integer modes. +Add support for the TAR register. -In preparation for the patches to add support for the TAR register, I restricted -the modes that special purpose registers (SPRs) could hold to be appropriate -sized scalar integers. I have discovered occasionally when GCC has run out of -registers, it will use the SPRs to hold values instead of spilling them to the -stack. The LR/CTR registers can hold 8/16/32-bit values and on 64-bit systems, -they can also hold 64-bit values. The VRSAVE and VSCR registers can only hold -32-bit values. - -2024-06-20 Michael Meissner +2024-07-03 Michael Meissner gcc/ - * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Restrict - SPR registers to only hold scalar integer modes of an appropriate size. - * config/rs6000/rs6000.md (movcc_): Remove alternatives that move - values to/from the SPRs. - (movsf_hardfloat): Likewise. - (movsd_hardfloat): Likewise. - (mov_softfloat): Likewise. - (mov_softfloat32): Likewise. - (mov_hardfloat64): Likewise. - (*mov_softfloat64): Likewise. + * config/rs6000/constraints.md (h constraint): Add TAR register to the + documentation. + (wt constraint): New constraint. + * config/rs6000/rs6000-cpus.def (ISA_3_0_MASKS_SERVER): Add -mtar. + (POWERPC_MASKS): Likewise. + * config/rs6000/rs6000.cc (rs6000_reg_names): Add TAR register support. + (alt_reg_names): Likewise. + (rs6000_hard_regno_mode_ok_uncached): Restrict SPR registers to only + hold scalar integer modes of an appropriate size. Add TAR register + support. + (rs6000_debug_reg_global): Print the register class that wt maps too. + (rs6000_init_hard_regno_mode_ok): Add TAR register support. + (rs6000_conditional_register_usage): Add TAR register support. + (print_operand): Likewise. + (rs6000_debugger_regno): Likewise. + (rs6000_opt_masks): Add support for -mtar. + * config/rs6000/rs6000.h (FIRST_PSEUDO_REGISTER): Add TAR register + support. + (FIXED_REGISTERS): Likewise. + (CALL_REALLY_USED_REGISTERS): Likewise. + (REG_ALLOC_ORDER): Likewise. + (enum reg_class): Likewise. + (REG_CLASS_NAMES): Likewise. + (REG_CLASS_CONTENTS): Likewise. + (enum r6000_reg_class_enum): Add support for the wt constraint. + * config/rs6000/rs6000.md (TAR_REGNO): New constant. + (call_indirect_nonlocal_sysv): Likewise. + (call_value_indirect_nonlocal_sysv): Likewise. + (call_indirect_aix): Likewise. + (call_value_indirect_aix): Likewise. + (call_indirect_elfv2): Likewise. + (call_indirect_pcrel): Likewise. + (call_value_indirect_elfv2): Likewise. + (call_value_indirect_pcrel): Likewise. + (*sibcall_indirect_nonlocal_sysv): Likewise. + (sibcall_value_indirect_nonlocal_sysv): Likewise. + (indirect_jump): Likewise. + (@indirect_jump_nospec): Likewise. + (@tablejump_insn_normal): Likewise. + (@tablejump_insn_nospec): Likewise. + * config/rs6000/rs6000.opt (-mtar): New option. + +gcc/testsuite/ + + * gcc.target/powerpc/ppc-switch-1.c: Update test for the TAR register. + * gcc.target/powerpc/pr51513.c: Likewise. + * gcc.target/powerpc/safe-indirect-jump-2.c: Likewise. + * gcc.target/powerpc/safe-indirect-jump-3.c: Likewise. + * gcc.target/powerpc/tar-register.c: New test. Branch work171-tar, baseline
[gcc r15-1830] vect: Fix ICE caused by missing check for TREE_CODE == SSA_NAME
https://gcc.gnu.org/g:d1eeafe40f263acdb5eb1b57f777e064a11ced2b commit r15-1830-gd1eeafe40f263acdb5eb1b57f777e064a11ced2b Author: Hu, Lin1 Date: Wed Jul 3 10:07:02 2024 +0800 vect: Fix ICE caused by missing check for TREE_CODE == SSA_NAME Need to check if the tree's code is SSA_NAME before SSA_NAME_RANGE_INFO. 2024-07-03 Hu, Lin1 Andrew Pinski gcc/ChangeLog: PR tree-optimization/115753 * tree-vect-stmts.cc (supportable_indirect_convert_operation): Add TYPE_CODE check before SSA_NAME_RANGE_INFO. gcc/testsuite/ChangeLog: PR tree-optimization/115753 * gcc.dg/vect/pr115753-1.c: New test. * gcc.dg/vect/pr115753-2.c: Ditto. * gcc.dg/vect/pr115753-3.c: Ditto. Diff: --- gcc/testsuite/gcc.dg/vect/pr115753-1.c | 12 gcc/testsuite/gcc.dg/vect/pr115753-2.c | 20 gcc/testsuite/gcc.dg/vect/pr115753-3.c | 15 +++ gcc/tree-vect-stmts.cc | 2 +- 4 files changed, 48 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr115753-1.c b/gcc/testsuite/gcc.dg/vect/pr115753-1.c new file mode 100644 index 000..2c1b6e5df63 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr115753-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -frounding-math" } */ +/* { dg-add-options float16 } */ +/* { dg-require-effective-target float16 } */ + +void f(_Complex _Float16*); +void +foo1 (_Complex _Float16 *d) +{ +_Complex _Float16 cf = 3967 + 3791 * 1i; +f(&cf); +} diff --git a/gcc/testsuite/gcc.dg/vect/pr115753-2.c b/gcc/testsuite/gcc.dg/vect/pr115753-2.c new file mode 100644 index 000..ceacada2a76 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr115753-2.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -frounding-math" } */ +/* { dg-add-options float16 } */ +/* { dg-require-effective-target float16 } */ + +void f(_Float16*); +void +foo1 () +{ + int t0 = 3967; + int t1 = 3969; + int t2 = 3971; + int t3 = 3973; + _Float16 tt[4]; + tt[0] = t0; + tt[1] = t1; + tt[2] = t2; + tt[3] = t3; + f(&tt[0]); +} diff --git a/gcc/testsuite/gcc.dg/vect/pr115753-3.c b/gcc/testsuite/gcc.dg/vect/pr115753-3.c new file mode 100644 index 000..8e95445897c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr115753-3.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -frounding-math" } */ + +void f(float*); +void +foo1 () +{ + long long t0 = __LONG_LONG_MAX__; + long long t1 = __LONG_LONG_MAX__ - 1; + float tt[2]; + tt[0] = t0; + tt[1] = t1; + f(&tt[0]); +} + diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 156c11fee82..fdcda0d2aba 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -14680,7 +14680,7 @@ supportable_indirect_convert_operation (code_helper code, In the future, if it is supported, changes may need to be made to this part, such as checking the RANGE of each element in the vector. */ - if (!SSA_NAME_RANGE_INFO (op0) + if ((TREE_CODE (op0) == SSA_NAME && !SSA_NAME_RANGE_INFO (op0)) || !vect_get_range_info (op0, &op_min_value, &op_max_value)) break;
[gcc r15-1831] c++: CTAD and trait built-ins
https://gcc.gnu.org/g:655fe94ae4c95d7f113c62787ca382d2742fad6f commit r15-1831-g655fe94ae4c95d7f113c62787ca382d2742fad6f Author: Jason Merrill Date: Wed Jul 3 17:27:48 2024 -0400 c++: CTAD and trait built-ins While poking at 101232 I noticed that we started trying to parse __is_invocable(_Fn, _Args...) as a functional cast to a CTAD placeholder type; we shouldn't consider CTAD for a template that shares a name (reserved for the implementation) with a built-in trait. gcc/cp/ChangeLog: * pt.cc (ctad_template_p): Return false for trait names. Diff: --- gcc/cp/pt.cc | 5 + 1 file changed, 5 insertions(+) diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 017cc7fd0ab..d1316483e24 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -30811,6 +30811,11 @@ ctad_template_p (tree tmpl) where the nested-name-specifier (if any) is non-dependent and the template-name of the simple-template-id names a deducible template. */ + if (DECL_CLASS_TEMPLATE_P (tmpl) + && IDENTIFIER_TRAIT_P (DECL_NAME (tmpl))) +/* Don't consider CTAD for templates with the same name as a trait; that + is ambiguous with e.g. __is_invocable(_Fn,_Args...). */ +return false; if (DECL_CLASS_TEMPLATE_P (tmpl) || DECL_TEMPLATE_TEMPLATE_PARM_P (tmpl)) return true;
[gcc r15-1832] c++: OVERLOAD in diagnostics
https://gcc.gnu.org/g:baac8f710e35cfea14060e5eca49dbb49ffc294d commit r15-1832-gbaac8f710e35cfea14060e5eca49dbb49ffc294d Author: Jason Merrill Date: Wed Jul 3 17:25:53 2024 -0400 c++: OVERLOAD in diagnostics In modules we can get an OVERLOAD around a non-function, so let's tail recurse instead of falling through. As a result we start printing the template header in this testcase. gcc/cp/ChangeLog: * error.cc (dump_decl) [OVERLOAD]: Recurse on single case. gcc/testsuite/ChangeLog: * g++.dg/warn/pr61945.C: Adjust diagnostic. Diff: --- gcc/cp/error.cc | 6 ++ gcc/testsuite/g++.dg/warn/pr61945.C | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/gcc/cp/error.cc b/gcc/cp/error.cc index 171a352c85f..1f36563ae2c 100644 --- a/gcc/cp/error.cc +++ b/gcc/cp/error.cc @@ -1407,10 +1407,8 @@ dump_decl (cxx_pretty_printer *pp, tree t, int flags) break; } - /* If there's only one function, just treat it like an ordinary -FUNCTION_DECL. */ - t = OVL_FIRST (t); - /* Fall through. */ + /* If there's only one function, dump that. */ + return dump_decl (pp, OVL_FIRST (t), flags); case FUNCTION_DECL: if (! DECL_LANG_SPECIFIC (t)) diff --git a/gcc/testsuite/g++.dg/warn/pr61945.C b/gcc/testsuite/g++.dg/warn/pr61945.C index 3d40581e5e3..2252330835f 100644 --- a/gcc/testsuite/g++.dg/warn/pr61945.C +++ b/gcc/testsuite/g++.dg/warn/pr61945.C @@ -7,5 +7,5 @@ class A { }; class B : A { template - void foo (); // { dg-message "by .B::foo\\(\\)." } + void foo (); // { dg-message "by .*B::foo\\(\\)." } };
[gcc r15-1833] [APX PPX] Avoid generating unmatched pushp/popp in pro/epilogue
https://gcc.gnu.org/g:8e72b1bb3896f6e8d4f4679cbcfbc2a8212d04f9 commit r15-1833-g8e72b1bb3896f6e8d4f4679cbcfbc2a8212d04f9 Author: Hongyu Wang Date: Wed Feb 7 14:42:58 2024 +0800 [APX PPX] Avoid generating unmatched pushp/popp in pro/epilogue According to APX spec, the pushp/popp pairs should be matched, otherwise the PPX hint cannot take effect and cause performance loss. In the ix86_expand_epilogue, there are several optimizations that may cause the epilogue using mov to restore the regs. Check if PPX applied and prevent usage of mov/leave in the epilogue. Also do not use PPX for eh_return. gcc/ChangeLog: * config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return. (ix86_emit_save_regs): Emit ppx is available only when TARGET_APX_PPX && !crtl->calls_eh_return. (ix86_expand_epilogue): Don't restore reg using mov when apx_ppx_used flag is true. * config/i386/i386.h (struct machine_frame_state): Add apx_ppx_used flag. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ppx-2.c: New test. * gcc.target/i386/apx-ppx-3.c: Likewise. Diff: --- gcc/config/i386/i386.cc | 13 + gcc/config/i386/i386.h| 4 gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++ gcc/testsuite/gcc.target/i386/apx-ppx-3.c | 7 +++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index bd7411190af..99def8d4a77 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -7429,6 +7429,7 @@ ix86_emit_save_regs (void) { int regno; rtx_insn *insn; + bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return; if (!TARGET_APX_PUSH2POP2 || !ix86_can_use_push2pop2 () @@ -7438,7 +7439,7 @@ ix86_emit_save_regs (void) if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) { insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno), - TARGET_APX_PPX)); + use_ppx)); RTX_FRAME_RELATED_P (insn) = 1; } } @@ -7469,7 +7470,7 @@ ix86_emit_save_regs (void) regno_list[0]), gen_rtx_REG (word_mode, regno_list[1]), -TARGET_APX_PPX)); +use_ppx)); RTX_FRAME_RELATED_P (insn) = 1; rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3)); @@ -7502,7 +7503,7 @@ ix86_emit_save_regs (void) else { insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno), - TARGET_APX_PPX)); + use_ppx)); RTX_FRAME_RELATED_P (insn) = 1; aligned = true; } @@ -7511,7 +7512,7 @@ ix86_emit_save_regs (void) { insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno_list[0]), - TARGET_APX_PPX)); + use_ppx)); RTX_FRAME_RELATED_P (insn) = 1; } } @@ -8985,6 +8986,7 @@ ix86_expand_prologue (void) if (!frame.save_regs_using_mov) { ix86_emit_save_regs (); + m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return; int_registers_saved = true; gcc_assert (m->fs.sp_offset == frame.reg_save_offset); } @@ -9870,6 +9872,9 @@ ix86_expand_epilogue (int style) /* SEH requires the use of pops to identify the epilogue. */ else if (TARGET_SEH) restore_regs_via_mov = false; + /* If we already save reg with pushp, don't use move at epilogue. */ + else if (m->fs.apx_ppx_used) +restore_regs_via_mov = false; /* If we're only restoring one register and sp cannot be used then using a move instruction to restore the register since it's less work than reloading sp and popping the register. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 147b12cd014..0c5292e1d64 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2693,6 +2693,10 @@ struct GTY(()) machine_frame_state The flags realigned and sp_realigned are mutually exclusive. */ BOOL_BITFIELD sp_realigned : 1; + /* When APX_PPX used in prologue, force epilogue to emit + popp instead of move and leave. */ + BOOL_BITFIELD apx_ppx_used : 1; + /* If sp_realigned is set, this is the last valid offset from the CFA that can be used for acc
[gcc r15-1834] [committed] Fix newlib build failure with rx as well as several dozen testsuite failures
https://gcc.gnu.org/g:759f4abe1220a8202b8389f9b756c35b6c9c439d commit r15-1834-g759f4abe1220a8202b8389f9b756c35b6c9c439d Author: Jeff Law Date: Wed Jul 3 21:11:07 2024 -0600 [committed] Fix newlib build failure with rx as well as several dozen testsuite failures The rx port has been failing to build newlib for a bit over a week. I can't remember if it was the late-combine work or the IRA costing twiddle, regardless the real bug is in the rx backend. Basically dwarf2cfi is blowing up because of inconsistent state caused by the failure to mark a stack adjustment as frame related. This instance in the epilogue looks like a simple goof. With the port building again, the testsuite would run and it showed a number of regressions, again related to CFI handling. The common thread was a failure to mark a copy from FP to SP in the prologue as frame related. The change which introduced this bug as supposed to just be changing promotions of vector types. It's unclear if Nick included the hunk accidentally or just goof'd on the logic. Regardless it looks quite incorrect. Reverting that hunk fixes the regressions *and* fixes 94 pre-existing failures. The net is rx-elf is regression free and has moved forward in terms of its testsuite status. Pushing to the trunk momentarily. gcc/ * config/rx/rx.cc (rx_expand_prologue): Mark the copy from FP to SP as frame related. (rx_expand_epilogue): Mark the stack pointer adjustment as frame related. Diff: --- gcc/config/rx/rx.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gcc/config/rx/rx.cc b/gcc/config/rx/rx.cc index 8048cc98708..c84e1398aad 100644 --- a/gcc/config/rx/rx.cc +++ b/gcc/config/rx/rx.cc @@ -1845,8 +1845,7 @@ rx_expand_prologue (void) gen_safe_add (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (- (HOST_WIDE_INT) frame_size), true); else - gen_safe_add (stack_pointer_rtx, frame_pointer_rtx, NULL_RTX, - false /* False because the epilogue will use the FP not the SP. */); + gen_safe_add (stack_pointer_rtx, frame_pointer_rtx, NULL_RTX, true); } } @@ -2119,7 +2118,7 @@ rx_expand_epilogue (bool is_sibcall) /* Cannot use the special instructions - deconstruct by hand. */ if (total_size) gen_safe_add (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (total_size), false); + GEN_INT (total_size), true); if (MUST_SAVE_ACC_REGISTER) {
[gcc(refs/users/meissner/heads/work171-bugs)] Do not build IEEE 128-bit libstdc++ support if VSX is not available.
https://gcc.gnu.org/g:8f06cd86e2d61b4ed3e14f43edac43c522260c1b commit 8f06cd86e2d61b4ed3e14f43edac43c522260c1b Author: Michael Meissner Date: Wed Jul 3 23:44:58 2024 -0400 Do not build IEEE 128-bit libstdc++ support if VSX is not available. 2024-07-03 Michael Meissner libstdc++-v3/ * configure.ac (powerpc*-*-linux*): Don't enable IEEE 128-bit on PowerPC systems without VSX. * configure: Regenerate. * numeric_traits.h: Don't enable IEEE 128-bit on PowerPC systems without VSX. Diff: --- libstdc++-v3/configure| 68 ++- libstdc++-v3/configure.ac | 58 -- libstdc++-v3/include/ext/numeric_traits.h | 2 +- 3 files changed, 86 insertions(+), 42 deletions(-) diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure index 5645e991af7..daab67f9dee 100755 --- a/libstdc++-v3/configure +++ b/libstdc++-v3/configure @@ -51355,8 +51355,31 @@ $as_echo "#define _GLIBCXX_LONG_DOUBLE_COMPAT 1" >>confdefs.h case "$target" in powerpc*-*-linux*) LONG_DOUBLE_COMPAT_FLAGS="$LONG_DOUBLE_COMPAT_FLAGS -mno-gnu-attribute" -# Check for IEEE128 support in libm: -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __frexpieee128 in -lm" >&5 + # Eliminate little endian systems without VSX + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + #ifndef __VSX__ + #error "IEEE 128-bit needs VSX" + #endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_ieee128_possible=yes +else + ac_ieee128_possible=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + if test $ac_ieee128_possible = yes; then + # Check for IEEE128 support in libm: + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __frexpieee128 in -lm" >&5 $as_echo_n "checking for __frexpieee128 in -lm... " >&6; } if ${ac_cv_lib_m___frexpieee128+:} false; then : $as_echo_n "(cached) " >&6 @@ -51401,18 +51424,18 @@ else ac_ldbl_ieee128_in_libc=no fi -if test $ac_ldbl_ieee128_in_libc = yes; then - # Determine which long double format is the compiler's default: - cat confdefs.h - <<_ACEOF >conftest.$ac_ext + if test $ac_ldbl_ieee128_in_libc = yes; then +# Determine which long double format is the compiler's default: +cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { -#ifndef __LONG_DOUBLE_IEEE128__ -#error compiler defaults to ibm128 -#endif + #ifndef __LONG_DOUBLE_IEEE128__ + #error compiler defaults to ibm128 + #endif ; return 0; @@ -51424,21 +51447,28 @@ else ac_ldbl_ieee128_default=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - # Library objects should use default long double format. - if test "$ac_ldbl_ieee128_default" = yes; then -LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute" -# Except for the ones that explicitly use these flags: -LONG_DOUBLE_ALT128_COMPAT_FLAGS="-mabi=ibmlongdouble -mno-gnu-attribute -Wno-psabi" - else -LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute" -LONG_DOUBLE_ALT128_COMPAT_FLAGS="-mabi=ieeelongdouble -mno-gnu-attribute -Wno-psabi" - fi +# Library objects should use default long double format. +if test "$ac_ldbl_ieee128_default" = yes; then + LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute" + # Except for the ones that explicitly use these flags: + LONG_DOUBLE_ALT128_COMPAT_FLAGS="-mabi=ibmlongdouble -mno-gnu-attribute -Wno-psabi" +else + LONG_DOUBLE_128_FLAGS="-mno-gnu-attribute" + LONG_DOUBLE_ALT128_COMPAT_FLAGS="-mabi=ieeelongdouble -mno-gnu-attribute -Wno-psabi" +fi $as_echo "#define _GLIBCXX_LONG_DOUBLE_ALT128_COMPAT 1" >>confdefs.h - port_specific_symbol_files="$port_specific_symbol_files \$(top_srcdir)/config/os/gnu-linux/ldbl-ieee128-extra.ver" - ac_ldbl_alt128_compat=yes +port_specific_symbol_files="$port_specific_symbol_files \$(top_srcdir)/config/os/gnu-linux/ldbl-ieee128-extra.ver" +ac_ldbl_alt128_compat=yes + else +ac_ldbl_alt128_compat=no + fi + +# IEEE 128-bit not possible else + acl_ldbl_ieee128_in_libc=no + acl_ldbl_ieee128_default=no ac_ldbl_alt128_compat=no fi ;; diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac index ccb24a82be7..69f795bd453 100644 --- a/libstdc++-v3/configure.ac +++ b/libstdc++-v3/configure.ac @@ -479,31 +479,45 @@ case "$target" in case "$target" in powerpc*-*-linux*)
[gcc(refs/users/meissner/heads/work171-bugs)] Do not build IEEE 128-bit libgfortran support if VSX is not available.
https://gcc.gnu.org/g:e405c7583045f798f26b80432bdf418514efcbaa commit e405c7583045f798f26b80432bdf418514efcbaa Author: Michael Meissner Date: Wed Jul 3 23:42:52 2024 -0400 Do not build IEEE 128-bit libgfortran support if VSX is not available. 2024-07-03 Michael Meissner libgfortran/ * configure.ac (powerpc64le*-linux*): Check to see that the compiler uses VSX before enabling IEEE 128-bit support. * configure: Regenerate. * kinds-override.h (GFC_REAL_17): Add check for __VSX__. * libgfortran.h (POWER_IEEE128): Likewise. Diff: --- libgfortran/configure| 7 +-- libgfortran/configure.ac | 3 +++ libgfortran/kinds-override.h | 2 +- libgfortran/libgfortran.h| 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/libgfortran/configure b/libgfortran/configure index 11a1bc5f070..2708e5c7eca 100755 --- a/libgfortran/configure +++ b/libgfortran/configure @@ -5981,6 +5981,9 @@ if test "x$GCC" = "xyes"; then #if __SIZEOF_LONG_DOUBLE__ != 16 #error long double is double #endif + #if !defined(__VSX__) + #error VSX is not available + #endif int main () { @@ -12847,7 +12850,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 12850 "configure" +#line 12853 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -12953,7 +12956,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 12956 "configure" +#line 12959 "configure" #include "confdefs.h" #if HAVE_DLFCN_H diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac index cca1ea0ea97..cfaeb9717ab 100644 --- a/libgfortran/configure.ac +++ b/libgfortran/configure.ac @@ -148,6 +148,9 @@ if test "x$GCC" = "xyes"; then AC_PREPROC_IFELSE( [AC_LANG_PROGRAM([[#if __SIZEOF_LONG_DOUBLE__ != 16 #error long double is double + #endif + #if !defined(__VSX__) + #error VSX is not available #endif]], [[(void) 0;]])], [AM_FCFLAGS="$AM_FCFLAGS -mabi=ibmlongdouble -mno-gnu-attribute"; diff --git a/libgfortran/kinds-override.h b/libgfortran/kinds-override.h index f6b4956c5ca..51f440e5323 100644 --- a/libgfortran/kinds-override.h +++ b/libgfortran/kinds-override.h @@ -30,7 +30,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #endif /* Keep these conditions on one line so grep can filter it out. */ -#if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG_DOUBLE__ == 16 +#if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG_DOUBLE__ == 16 && defined(__VSX__) typedef _Float128 GFC_REAL_17; typedef _Complex _Float128 GFC_COMPLEX_17; #define HAVE_GFC_REAL_17 diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h index 5c59ec26e16..23660335243 100644 --- a/libgfortran/libgfortran.h +++ b/libgfortran/libgfortran.h @@ -104,7 +104,7 @@ typedef off_t gfc_offset; #endif #if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ \ -&& defined __GLIBC_PREREQ +&& defined __GLIBC_PREREQ && defined(__VSX__) #if __GLIBC_PREREQ (2, 32) #define POWER_IEEE128 1 #endif
[gcc(refs/users/meissner/heads/work171-bugs)] Update ChangeLog.*
https://gcc.gnu.org/g:24ecb864d6c40f84d20420c105f6b36e534285f1 commit 24ecb864d6c40f84d20420c105f6b36e534285f1 Author: Michael Meissner Date: Wed Jul 3 23:47:19 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.bugs | 38 +- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.bugs b/gcc/ChangeLog.bugs index d2c5d2ab118..1b4d99c0e5f 100644 --- a/gcc/ChangeLog.bugs +++ b/gcc/ChangeLog.bugs @@ -1,6 +1,42 @@ + Branch work171-bugs, patch #301 + +Do not build IEEE 128-bit libstdc++ support if VSX is not available. + +2024-07-03 Michael Meissner + +libstdc++-v3/ + + * configure.ac (powerpc*-*-linux*): Don't enable IEEE 128-bit on PowerPC + systems without VSX. + * configure: Regenerate. + * numeric_traits.h: Don't enable IEEE 128-bit on PowerPC systems without + VSX. + + Branch work171-bugs, patch #300 + +Do not build IEEE 128-bit libgfortran support if VSX is not available. + +2024-07-03 Michael Meissner + +libgfortran/ + + * configure.ac (powerpc64le*-linux*): Check to see that the compiler + uses VSX before enabling IEEE 128-bit support. + * configure: Regenerate. + * kinds-override.h (GFC_REAL_17): Add check for __VSX__. + * libgfortran.h (POWER_IEEE128): Likewise. + Branch work171-bugs, baseline +Add ChangeLog.bugs and update REVISION. + +2024-06-28 Michael Meissner + +gcc/ + + * ChangeLog.bugs: New file for branch. + * REVISION: Update. + 2024-06-28 Michael Meissner Clone branch -
[gcc r15-1835] i386: Add additional variant of bswaphisi2_lowpart peephole2.
https://gcc.gnu.org/g:727f8b142b7d5442af6c2e903293abc367a8de5f commit r15-1835-g727f8b142b7d5442af6c2e903293abc367a8de5f Author: Roger Sayle Date: Thu Jul 4 07:31:17 2024 +0100 i386: Add additional variant of bswaphisi2_lowpart peephole2. This patch adds an additional variation of the peephole2 used to convert bswaphisi2_lowpart into rotlhi3_1_slp, which converts xchgb %ah,%al into rotw if the flags register isn't live. The motivating example is: void ext(int x); void foo(int x) { ext((x&~0x)|((x>>8)&0xff)|((x&0xff)<<8)); } where GCC with -O2 currently produces: foo:movl%edi, %eax rolw$8, %ax movl%eax, %edi jmp ext The issue is that the original xchgb (bswaphisi2_lowpart) can only be performed in "Q" registers that allow the %?h register to be used, so reload generates the above two movl. However, it's later in peephole2 where we see that CC_FLAGS can be clobbered, so we can use a rotate word, which is more forgiving with register allocations. With the additional peephole2 proposed here, we now generate: foo:rolw$8, %di jmp ext 2024-07-04 Roger Sayle gcc/ChangeLog * config/i386/i386.md (bswaphisi2_lowpart peephole2): New peephole2 variant to eliminate register shuffling. gcc/testsuite/ChangeLog * gcc.target/i386/xchg-4.c: New test case. Diff: --- gcc/config/i386/i386.md| 24 gcc/testsuite/gcc.target/i386/xchg-4.c | 11 +++ 2 files changed, 35 insertions(+) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4a44b69b5fc..b24c4fe5875 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -21489,6 +21489,30 @@ (clobber (reg:CC FLAGS_REG))])] "operands[0] = gen_lowpart (HImode, operands[0]);") +;; Variant of above peephole2 to improve register allocation. +(define_peephole2 + [(set (match_operand:SI 0 "general_reg_operand") +(match_operand:SI 1 "register_operand")) + (set (match_dup 0) + (ior:SI (and:SI (match_dup 0) + (const_int -65536)) + (lshiftrt:SI (bswap:SI (match_dup 0)) +(const_int 16 + (set (match_operand:SI 2 "general_reg_operand") (match_dup 0))] + "!(TARGET_USE_XCHGB || + TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && peep2_regno_dead_p (0, FLAGS_REG) + && peep2_reg_dead_p(3, operands[0])" + [(parallel +[(set (strict_low_part (match_dup 3)) + (rotate:HI (match_dup 3) (const_int 8))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!rtx_equal_p (operands[1], operands[2])) +emit_move_insn (operands[2], operands[1]); + operands[3] = gen_lowpart (HImode, operands[2]); +}) + (define_expand "paritydi2" [(set (match_operand:DI 0 "register_operand") (parity:DI (match_operand:DI 1 "register_operand")))] diff --git a/gcc/testsuite/gcc.target/i386/xchg-4.c b/gcc/testsuite/gcc.target/i386/xchg-4.c new file mode 100644 index 000..de099e79f5d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/xchg-4.c @@ -0,0 +1,11 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ + +void ext(int x); +void foo(int x) +{ +ext((x&~0x)|((x>>8)&0xff)|((x&0xff)<<8)); +} + +/* { dg-final { scan-assembler "rolw" } } */ +/* { dg-final { scan-assembler-not "mov" } } */
[gcc r15-1836] Use __builtin_cpu_support instead of __get_cpuid_count.
https://gcc.gnu.org/g:699087a16591adfdf21228876b6c48dbcd353faa commit r15-1836-g699087a16591adfdf21228876b6c48dbcd353faa Author: liuhongt Date: Thu Jul 4 13:57:32 2024 +0800 Use __builtin_cpu_support instead of __get_cpuid_count. gcc/testsuite/ChangeLog: PR target/115748 * gcc.target/i386/avx512-check.h: Use __builtin_cpu_support instead of __get_cpuid_count. Diff: --- gcc/testsuite/gcc.target/i386/avx512-check.h | 46 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/avx512-check.h b/gcc/testsuite/gcc.target/i386/avx512-check.h index 71858a33dac..8ec1a7ccbae 100644 --- a/gcc/testsuite/gcc.target/i386/avx512-check.h +++ b/gcc/testsuite/gcc.target/i386/avx512-check.h @@ -38,69 +38,63 @@ __attribute__((noipa,target("no-avx"))) int avx512_runtime_support_p () { - unsigned int eax, ebx, ecx, edx; - - if (!__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx)) -return 0; - /* Run AVX512 test only if host has ISA support. */ - if (check_osxsave () - && (ebx & bit_AVX512F) + if (__builtin_cpu_supports ("avx512f") #ifdef AVX512VL - && (ebx & bit_AVX512VL) + && __builtin_cpu_supports ("avx512vl") #endif #ifdef AVX512ER - && (ebx & bit_AVX512ER) + && __builtin_cpu_supports ("avx512er") #endif #ifdef AVX512CD - && (ebx & bit_AVX512CD) + && __builtin_cpu_supports ("avx512cd") #endif #ifdef AVX512DQ - && (ebx & bit_AVX512DQ) + && __builtin_cpu_supports ("avx512dq") #endif #ifdef AVX512BW - && (ebx & bit_AVX512BW) + && __builtin_cpu_supports ("avx512bw") #endif #ifdef AVX512IFMA - && (ebx & bit_AVX512IFMA) + && __builtin_cpu_supports ("avx512ifma") #endif #ifdef AVX512VBMI - && (ecx & bit_AVX512VBMI) + && __builtin_cpu_supports ("avx512vbmi") #endif #ifdef AVX5124FMAPS - && (edx & bit_AVX5124FMAPS) + && __builtin_cpu_supports ("avx5124fmaps") #endif #ifdef AVX5124VNNIW - && (edx & bit_AVX5124VNNIW) + && __builtin_cpu_supports ("avx5124vnniw") #endif #ifdef AVX512VPOPCNTDQ - && (ecx & bit_AVX512VPOPCNTDQ) + && __builtin_cpu_supports ("avx512vpopcntdq") #endif #ifdef AVX512BITALG - && (ecx & bit_AVX512BITALG) + && __builtin_cpu_supports ("avx512bitalg") #endif #ifdef GFNI - && (ecx & bit_GFNI) + && __builtin_cpu_supports ("gfni") #endif #ifdef AVX512VBMI2 - && (ecx & bit_AVX512VBMI2) + && __builtin_cpu_supports ("avx512vbmi2") #endif #ifdef AVX512VNNI - && (ecx & bit_AVX512VNNI) + && __builtin_cpu_supports ("avx512vnni") #endif #ifdef AVX512FP16 - && (edx & bit_AVX512FP16) + && __builtin_cpu_supports ("avx512fp16") #endif #ifdef VAES - && (ecx & bit_VAES) + && __builtin_cpu_supports ("vaes") #endif #ifdef VPCLMULQDQ - && (ecx & bit_VPCLMULQDQ) + && __builtin_cpu_supports ("vpclmulqdq") #endif #ifdef AVX512VP2INTERSECT - && (edx & bit_AVX512VP2INTERSECT) + && __builtin_cpu_supports ("avx512vp2intersect") #endif - && avx512f_os_support ()) + ) { return 1; }
[gcc r15-1837] middle-end/115426 - wrong gimplification of "rm" asm output operand
https://gcc.gnu.org/g:a4bbdec2be1c9f8fb49276b8a54ee86024ceac17 commit r15-1837-ga4bbdec2be1c9f8fb49276b8a54ee86024ceac17 Author: Richard Biener Date: Tue Jun 11 13:11:08 2024 +0200 middle-end/115426 - wrong gimplification of "rm" asm output operand When the operand is gimplified to an extract of a register or a register we have to disallow memory as we otherwise fail to gimplify it properly. Instead of __asm__("" : "=rm" __imag ); we want __asm__("" : "=rm" D.2772); _1 = REALPART_EXPR ; r = COMPLEX_EXPR <_1, D.2772>; otherwise SSA rewrite will fail and generate wrong code with 'r' left bare in the asm output. PR middle-end/115426 * gimplify.cc (gimplify_asm_expr): Handle "rm" output constraint gimplified to a register (operation). * gcc.dg/pr115426.c: New testcase. Diff: --- gcc/gimplify.cc | 8 gcc/testsuite/gcc.dg/pr115426.c | 14 ++ 2 files changed, 22 insertions(+) diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index 622c51d5c3f..5a9627c4acf 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -7040,6 +7040,14 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p) ret = tret; } + /* If the gimplified operand is a register we do not allow memory. */ + if (allows_reg + && allows_mem + && (is_gimple_reg (TREE_VALUE (link)) + || (handled_component_p (TREE_VALUE (link)) + && is_gimple_reg (TREE_OPERAND (TREE_VALUE (link), 0) + allows_mem = 0; + /* If the constraint does not allow memory make sure we gimplify it to a register if it is not already but its base is. This happens for complex and vector components. */ diff --git a/gcc/testsuite/gcc.dg/pr115426.c b/gcc/testsuite/gcc.dg/pr115426.c new file mode 100644 index 000..02bfc3f21fa --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr115426.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-std=gnu11" } */ + +_Complex int fcs (_Complex int r) +{ + __asm__("" : "=rm" (__imag__ r)); + return r; +} + +_Complex int fcs2 (_Complex int r) +{ + __asm__("" : "=m" (__imag__ r)); + return r; +}