[gcc r15-4530] RISC-V: Add testcases for form 7 of vector signed SAT_TRUNC
https://gcc.gnu.org/g:f138806811968a99bd81d7a60746279877df7ee8 commit r15-4530-gf138806811968a99bd81d7a60746279877df7ee8 Author: Pan Li Date: Mon Oct 14 15:10:46 2024 +0800 RISC-V: Add testcases for form 7 of vector signed SAT_TRUNC Form 7: #define DEF_VEC_SAT_S_TRUNC_FMT_7(NT, WT, NT_MIN, NT_MAX) \ void __attribute__((noinline))\ vec_sat_s_trunc_##NT##_##WT##_fmt_7 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ NT trunc = (NT)x; \ out[i] = (WT)NT_MIN > x || x >= (WT)NT_MAX \ ? x < 0 ? NT_MIN : NT_MAX \ : trunc; \ } \ } The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i64-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i64-to-i8.c: New test. Signed-off-by: Pan Li Diff: --- .../rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-7-i64-to-i16.c| 9 + .../autovec/unop/vec_sat_s_trunc-7-i64-to-i32.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-7-i64-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-run-7-i16-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-7-i32-to-i16.c| 16 .../autovec/unop/vec_sat_s_trunc-run-7-i32-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-7-i64-to-i16.c| 16 .../unop/vec_sat_s_trunc-run-7-i64-to-i32.c| 16 .../autovec/unop/vec_sat_s_trunc-run-7-i64-to-i8.c | 16 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 22 ++ 13 files changed, 172 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c new file mode 100644 index ..a6eb2d5b0b2f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_7(int8_t, int16_t, INT8_MIN, INT8_MAX) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ +/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c new file mode 100644 index ..fd01c74d2df9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_7(int16_t, int32_t, INT16_MIN, INT16_MAX) + +/* { dg-final { scan-rtl-dump-times
[gcc r15-4531] RISC-V: Add testcases for form 8 of vector signed SAT_TRUNC
https://gcc.gnu.org/g:cb131a401b7489cc17e2d70420cf9a916515b3f6 commit r15-4531-gcb131a401b7489cc17e2d70420cf9a916515b3f6 Author: Pan Li Date: Mon Oct 14 15:23:57 2024 +0800 RISC-V: Add testcases for form 8 of vector signed SAT_TRUNC Form 8: #define DEF_VEC_SAT_S_TRUNC_FMT_8(NT, WT, NT_MIN, NT_MAX) \ void __attribute__((noinline))\ vec_sat_s_trunc_##NT##_##WT##_fmt_8 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ NT trunc = (NT)x; \ out[i] = (WT)NT_MIN >= x || x >= (WT)NT_MAX \ ? x < 0 ? NT_MIN : NT_MAX \ : trunc; \ } \ } The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i64-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i64-to-i8.c: New test. Signed-off-by: Pan Li Diff: --- .../rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-8-i64-to-i16.c| 9 + .../autovec/unop/vec_sat_s_trunc-8-i64-to-i32.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-8-i64-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-run-8-i16-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-8-i32-to-i16.c| 16 .../autovec/unop/vec_sat_s_trunc-run-8-i32-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-8-i64-to-i16.c| 16 .../unop/vec_sat_s_trunc-run-8-i64-to-i32.c| 16 .../autovec/unop/vec_sat_s_trunc-run-8-i64-to-i8.c | 16 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 22 ++ 13 files changed, 172 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c new file mode 100644 index ..64f140f764e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_8(int8_t, int16_t, INT8_MIN, INT8_MAX) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ +/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c new file mode 100644 index ..9bd95a52a012 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_8(int16_t, int32_t, INT16_MIN, INT16_MAX) + +/* { dg-final { scan-rtl-dump-times
[gcc(refs/users/meissner/heads/work181-sha)] Move xxeval case to be first.
https://gcc.gnu.org/g:cc231f8c1f7b0085c9c5b56435801ae6066ab815 commit cc231f8c1f7b0085c9c5b56435801ae6066ab815 Author: Michael Meissner Date: Mon Oct 21 12:23:03 2024 -0400 Move xxeval case to be first. 2024-10-21 Michael Meissner gcc/ * config/rs6000/genfusion.pl (gen_logical_addsubf): Move xxeval case to be first. * config/rs6000/fusion.md: Regenerate. Diff: --- gcc/config/rs6000/fusion.md| 352 - gcc/config/rs6000/genfusion.pl | 4 +- 2 files changed, 178 insertions(+), 178 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 215a3aae074f..6f9081ab3372 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -1872,16 +1872,16 @@ ;; vector vand -> vand (define_insn "*fuse_vand_vand" [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") -(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") - (match_operand:VM 1 "vector_fusion_operand" "%v,v,v,wa,v")) - (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) +(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" "wa,v,v,v,v") + (match_operand:VM 1 "vector_fusion_operand" "%wa,v,v,v,v")) + (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ + xxeval %x3,%x2,%x1,%x0,1 vand %3,%1,%0\;vand %3,%3,%2 vand %3,%1,%0\;vand %3,%3,%2 vand %3,%1,%0\;vand %3,%3,%2 - xxeval %x3,%x2,%x1,%x0,1 vand %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") @@ -1893,16 +1893,16 @@ ;; vector vandc -> vand (define_insn "*fuse_vandc_vand" [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") -(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) - (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) - (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) +(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "wa,v,v,v,v")) + (match_operand:VM 1 "vector_fusion_operand" "wa,v,v,v,v")) + (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ + xxeval %x3,%x2,%x1,%x0,2 vandc %3,%1,%0\;vand %3,%3,%2 vandc %3,%1,%0\;vand %3,%3,%2 vandc %3,%1,%0\;vand %3,%3,%2 - xxeval %x3,%x2,%x1,%x0,2 vandc %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") @@ -1914,16 +1914,16 @@ ;; vector veqv -> vand (define_insn "*fuse_veqv_vand" [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") -(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") - (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) - (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) +(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "wa,v,v,v,v") + (match_operand:VM 1 "vector_fusion_operand" "wa,v,v,v,v"))) + (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ + xxeval %x3,%x2,%x1,%x0,9 veqv %3,%1,%0\;vand %3,%3,%2 veqv %3,%1,%0\;vand %3,%3,%2 veqv %3,%1,%0\;vand %3,%3,%2 - xxeval %x3,%x2,%x1,%x0,9 veqv %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") @@ -1935,16 +1935,16 @@ ;; vector vnand -> vand (define_insn "*fuse_vnand_vand" [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") -(and:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) - (not:VM (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) - (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) +(and:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "wa,v,v,v,v")) + (not:VM (match_operand:VM 1 "vector_fusion_operand" "wa,v,v,v,v"))) + (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ + xxeval %x3,%x2,%x1,%x0,14 vnand %3,%1,%0\;vand %3,%3,%2 vnand %3,%1,%0\;vand %3,%3,%2 vnand %3,%1,%0\;vand %3,%3,%2 - xxeval %x3,%x2,%x1,%x0,14 vnand %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") @@ -1956,16 +1956,16 @@ ;; vector vnor -> vand (define_insn "*fuse_vnor_vand" [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") -(and:VM (and:VM (not:
[gcc(refs/users/meissner/heads/work181-sha)] Update ChangeLog.*
https://gcc.gnu.org/g:b19edef8b8eb4dd28774a5b23feb7cd1703a49e0 commit b19edef8b8eb4dd28774a5b23feb7cd1703a49e0 Author: Michael Meissner Date: Mon Oct 21 12:23:44 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.sha | 14 +- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha index 2d7f998a8b52..403a46031b20 100644 --- a/gcc/ChangeLog.sha +++ b/gcc/ChangeLog.sha @@ -1,4 +1,16 @@ - Branch work181-sha, patch #403 + Branch work181-sha, patch #405 + +Move xxeval case to be first. + +2024-10-21 Michael Meissner + +gcc/ + + * config/rs6000/genfusion.pl (gen_logical_addsubf): Move xxeval case + to be first. + * config/rs6000/fusion.md: Regenerate. + + Branch work181-sha, patch #404 Move xxeval case before alternative that needs a temporary register.
[gcc(refs/users/jmelcr/heads/omp-cp)] omp-cp: add callback flag to some checks
https://gcc.gnu.org/g:cf15a12be9efe68841746d0ab189e3846499498f commit cf15a12be9efe68841746d0ab189e3846499498f Author: Josef Melcr Date: Mon Oct 21 18:04:21 2024 +0200 omp-cp: add callback flag to some checks gcc/ChangeLog: * cgraph.cc (cgraph_edge::redirect_call_stmt_to_callee): return if callback flag is set (cgraph_node::verify_node): allow some weirdness if callback is set Signed-off-by: Josef Melcr Diff: --- gcc/cgraph.cc | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc index c62f5de807da..96d95a87c769 100644 --- a/gcc/cgraph.cc +++ b/gcc/cgraph.cc @@ -1514,9 +1514,7 @@ cgraph_edge::redirect_call_stmt_to_callee (cgraph_edge *e, } } - - if (e->indirect_unknown_callee - || decl == e->callee->decl) + if (e->indirect_unknown_callee || decl == e->callee->decl || e->callback) return e->call_stmt; if (decl && ipa_saved_clone_sources) @@ -3682,6 +3680,7 @@ cgraph_node::verify_node (void) if (gimple_has_body_p (e->caller->decl) && !e->caller->inlined_to && !e->speculative + && !e->callback /* Optimized out calls are redirected to __builtin_unreachable. */ && (e->count.nonzero_p () || ! e->callee->decl @@ -3929,7 +3928,7 @@ cgraph_node::verify_node (void) for (e = callees; e; e = e->next_callee) { - if (!e->aux && !e->speculative) + if (!e->aux && !e->speculative && !e->callback) { error ("edge %s->%s has no corresponding call_stmt", identifier_to_locale (e->caller->name ()),
[gcc(refs/users/meissner/heads/work181-sha)] Update ChangeLog.*
https://gcc.gnu.org/g:82ef5cf3c220dfcd7c3d76d40658a57eb8b94bcc commit 82ef5cf3c220dfcd7c3d76d40658a57eb8b94bcc Author: Michael Meissner Date: Mon Oct 21 13:11:21 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.sha | 12 +--- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha index 403a46031b20..515e0390089a 100644 --- a/gcc/ChangeLog.sha +++ b/gcc/ChangeLog.sha @@ -1,14 +1,4 @@ - Branch work181-sha, patch #405 - -Move xxeval case to be first. - -2024-10-21 Michael Meissner - -gcc/ - - * config/rs6000/genfusion.pl (gen_logical_addsubf): Move xxeval case - to be first. - * config/rs6000/fusion.md: Regenerate. + Branch work181-sha, patch #405 was reverted Branch work181-sha, patch #404
[gcc(refs/users/jmelcr/heads/omp-cp)] omp-cp: resolve segfault through monkey stick debugging
https://gcc.gnu.org/g:397b6d3ea75e4f16c7da848c55f7823480c662c8 commit 397b6d3ea75e4f16c7da848c55f7823480c662c8 Author: Josef Melcr Date: Fri Oct 18 23:41:37 2024 +0200 omp-cp: resolve segfault through monkey stick debugging gcc/ChangeLog: * cgraph.h: modify functions regarding speculative call edges * ipa-fnsummary.cc (analyze_function_body): integrate callback edges (compute_fn_summary): integrate callback edges in similar fashion to speculative edges Signed-off-by: Josef Melcr Diff: --- gcc/cgraph.h | 24 +--- gcc/ipa-fnsummary.cc | 9 - 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/gcc/cgraph.h b/gcc/cgraph.h index bf0a22bcf365..5f1faa4c56ad 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1769,6 +1769,10 @@ public: target2. */ cgraph_edge *next_speculative_call_target () { +if (callback) + { + return NULL; + } cgraph_edge *e = this; gcc_checking_assert (speculative && callee); @@ -1783,15 +1787,29 @@ public: indirect call edge in the speculative call sequence. */ cgraph_edge *speculative_call_indirect_edge () { -gcc_checking_assert (speculative); +gcc_checking_assert (speculative || callback); if (!callee) return this; -for (cgraph_edge *e2 = caller->indirect_calls; -true; e2 = e2->next_callee) + +cgraph_edge * e2 = NULL; +for (e2 = caller->indirect_calls; +e2; e2 = e2->next_callee) if (e2->speculative && call_stmt == e2->call_stmt && lto_stmt_uid == e2->lto_stmt_uid) return e2; + +if (!e2 && callback) + { + for (e2 = caller->callees; e2; e2 = e2->next_callee) + { + if (e2->has_callback && call_stmt == e2->call_stmt) + { + return e2; + } + } + } + gcc_unreachable(); } /* When called on any edge in speculative call and when given any target diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc index b38247834065..7858684aaa25 100644 --- a/gcc/ipa-fnsummary.cc +++ b/gcc/ipa-fnsummary.cc @@ -2900,7 +2900,7 @@ analyze_function_body (struct cgraph_node *node, bool early) es->call_stmt_time = this_time; es->loop_depth = bb_loop_depth (bb); edge_set_predicate (edge, &bb_predicate); - if (edge->speculative) + if (edge->speculative || edge->callback) { cgraph_edge *indirect = edge->speculative_call_indirect_edge (); @@ -3309,6 +3309,13 @@ compute_fn_summary (struct cgraph_node *node, bool early) for (e = node->indirect_calls; e; e = e->next_callee) if (e->speculative) break; + + if (!e) + { + for (e = node->callees; e; e = e->next_callee) + if (e->callback) + break; + } gcc_assert (e || size_info->size == size_info->self_size); } }
[gcc r15-4532] [committed][PR rtl-optimization/116488] Fix SIGN_EXTEND source handling in ext-dce
https://gcc.gnu.org/g:36e91df7716d34aa5694533837551593ec28f22b commit r15-4532-g36e91df7716d34aa5694533837551593ec28f22b Author: Jeff Law Date: Mon Oct 21 13:37:21 2024 -0600 [committed][PR rtl-optimization/116488] Fix SIGN_EXTEND source handling in ext-dce A while back I noticed that the code to call carry_backpropagate was being called after the optimization step. Which seemed wrong, but at the time I didn't have a testcase showing it as a problem. Now I have 4 :-) The way things used to work, the extension would be stripped away before calling carry_backpropagte, meaning carry_backpropagate would never see a SIGN_EXTENSION. Thus the code trying to account for the sign extended bit was never reached. Getting that bit marked live is what's needed to fix these testcases. Fallout is minor with just an adjustment needed to sensibly deal with vector modes in a place where we didn't have them before. I'm still somewhat concerned about this code. Specifically whether or not we can get in here with arbitrarily complex RTL, and if so do we need to recurse down and look at those sub-expressions. So while this patch fixes the most pressing issue, I wouldn't be terribly surprised if we're back inside this code at some point. Bootstrapped and regression tested on x86_64, ppc64le, riscv64, s390x, mips64, loongarch, aarch64, m68k, alpha, hppa, sh4, sh4eb, perhaps something else that I've forgotten... Also tested on all the crosses in my tester. PR rtl-optimization/116488 PR rtl-optimization/116579 PR rtl-optimization/116915 PR rtl-optimization/117226 gcc/ * ext-dce.cc (carry_backpropagate): Properly handle SIGN_EXTEND, add ZERO_EXTEND handling as well. (ext_dce_process_uses): Call carry_backpropagate before the optimization step. gcc/testsuite/ * gcc.dg/torture/pr116488.c: New test. * gcc.dg/torture/pr116579.c: New test. * gcc.dg/torture/pr116915.c: New test. * gcc.dg/torture/pr117226.c: New test. Diff: --- gcc/ext-dce.cc | 34 +++-- gcc/testsuite/gcc.dg/torture/pr116488.c | 20 +++ gcc/testsuite/gcc.dg/torture/pr116579.c | 18 + gcc/testsuite/gcc.dg/torture/pr116915.c | 15 +++ gcc/testsuite/gcc.dg/torture/pr117226.c | 17 + 5 files changed, 98 insertions(+), 6 deletions(-) diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index 2f3514ae7976..a449b9f6b49c 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -478,7 +478,12 @@ binop_implies_op2_fully_live (rtx_code code) holds true, and bits set in MASK are live in the result. Compute a mask of (potentially) live bits in the non-constant inputs. In case of binop_implies_op2_fully_live (e.g. shifts), the computed mask may - exclusively pertain to the first operand. */ + exclusively pertain to the first operand. + + This looks wrong as we may have some important operations embedded as + operands of another operation. For example, we might have an extension + wrapping a shift. It really feels like this needs to be recursing down + into operands much more often. */ unsigned HOST_WIDE_INT carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x) @@ -557,9 +562,26 @@ carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x) return mmask; case SIGN_EXTEND: - if (mask & ~GET_MODE_MASK (GET_MODE_INNER (GET_MODE (XEXP (x, 0) + if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant () + || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ()) + return -1; + + /* We want the mode of the inner object. We need to ensure its +sign bit is on in MASK. */ + mode = GET_MODE (XEXP (x, 0)); + if (mask & ~GET_MODE_MASK (GET_MODE_INNER (mode))) mask |= 1ULL << (GET_MODE_BITSIZE (mode).to_constant () - 1); - return mask; + + /* Recurse into the operand. */ + return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0)); + +case ZERO_EXTEND: + if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant () + || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ()) + return -1; + + /* Recurse into the operand. */ + return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0)); /* We propagate for the shifted operand, but not the shift count. The count is handled specially. */ @@ -670,6 +692,8 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, if (skipped_dest) dst_mask = -1; + dst_mask = carry_backpropagate (dst_mask, code, src); + /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT of the source specially to im
[gcc r15-4514] rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED
https://gcc.gnu.org/g:1a4c5643a5911d130dfab9a064222baeeb7f9be7 commit r15-4514-g1a4c5643a5911d130dfab9a064222baeeb7f9be7 Author: Jeevitha Date: Thu Oct 10 14:42:45 2024 -0500 rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED Corrected the function code for the Atomic Memory Operation "Fetch and Decrement Bounded", changing it from 0x1A to 0x1C. 2024-10-11 Jeevitha Palanisamy gcc/ * config/rs6000/amo.h (enum _AMO_LD): Correct the function code for _AMO_LD_DEC_BOUNDED. Diff: --- gcc/config/rs6000/amo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h index 6b9e4e088b97..1303c9d9dab2 100644 --- a/gcc/config/rs6000/amo.h +++ b/gcc/config/rs6000/amo.h @@ -46,7 +46,7 @@ enum _AMO_LD { _AMO_LD_CS_NE= 0x10, /* Compare and Swap Not Equal. */ _AMO_LD_INC_BOUNDED = 0x18, /* Fetch and Increment Bounded. */ _AMO_LD_INC_EQUAL= 0x19, /* Fetch and Increment Equal. */ - _AMO_LD_DEC_BOUNDED = 0x1A /* Fetch and Decrement Bounded. */ + _AMO_LD_DEC_BOUNDED = 0x1C /* Fetch and Decrement Bounded. */ }; /* Implementation of the simple LWAT/LDAT operations that take one register and
[gcc r12-10779] rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED
https://gcc.gnu.org/g:41377d0f4e791bcdd848e11eac172b8e81ecb6ec commit r12-10779-g41377d0f4e791bcdd848e11eac172b8e81ecb6ec Author: Jeevitha Date: Mon Oct 21 04:01:46 2024 -0500 rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED Corrected the function code for the Atomic Memory Operation "Fetch and Decrement Bounded", changing it from 0x1A to 0x1C. 2024-10-11 Jeevitha Palanisamy gcc/ * config/rs6000/amo.h (enum _AMO_LD): Correct the function code for _AMO_LD_DEC_BOUNDED. (cherry picked from commit 1a4c5643a5911d130dfab9a064222baeeb7f9be7) Diff: --- gcc/config/rs6000/amo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h index ea4668e0547f..47d19ee181c2 100644 --- a/gcc/config/rs6000/amo.h +++ b/gcc/config/rs6000/amo.h @@ -46,7 +46,7 @@ enum _AMO_LD { _AMO_LD_CS_NE= 0x10, /* Compare and Swap Not Equal. */ _AMO_LD_INC_BOUNDED = 0x18, /* Fetch and Increment Bounded. */ _AMO_LD_INC_EQUAL= 0x19, /* Fetch and Increment Equal. */ - _AMO_LD_DEC_BOUNDED = 0x1A /* Fetch and Decrement Bounded. */ + _AMO_LD_DEC_BOUNDED = 0x1C /* Fetch and Decrement Bounded. */ }; /* Implementation of the simple LWAT/LDAT operations that take one register and
[gcc r13-9140] rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED
https://gcc.gnu.org/g:5be7a44c7a7f86dc2fe82dafcb76603a718dedbc commit r13-9140-g5be7a44c7a7f86dc2fe82dafcb76603a718dedbc Author: Jeevitha Date: Mon Oct 21 03:58:28 2024 -0500 rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED Corrected the function code for the Atomic Memory Operation "Fetch and Decrement Bounded", changing it from 0x1A to 0x1C. 2024-10-11 Jeevitha Palanisamy gcc/ * config/rs6000/amo.h (enum _AMO_LD): Correct the function code for _AMO_LD_DEC_BOUNDED. (cherry picked from commit 1a4c5643a5911d130dfab9a064222baeeb7f9be7) Diff: --- gcc/config/rs6000/amo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h index fa31bef9e935..e03fd7c71bb8 100644 --- a/gcc/config/rs6000/amo.h +++ b/gcc/config/rs6000/amo.h @@ -46,7 +46,7 @@ enum _AMO_LD { _AMO_LD_CS_NE= 0x10, /* Compare and Swap Not Equal. */ _AMO_LD_INC_BOUNDED = 0x18, /* Fetch and Increment Bounded. */ _AMO_LD_INC_EQUAL= 0x19, /* Fetch and Increment Equal. */ - _AMO_LD_DEC_BOUNDED = 0x1A /* Fetch and Decrement Bounded. */ + _AMO_LD_DEC_BOUNDED = 0x1C /* Fetch and Decrement Bounded. */ }; /* Implementation of the simple LWAT/LDAT operations that take one register and
[gcc r14-10808] rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED
https://gcc.gnu.org/g:17f1277d78c51d64a222ade218796837f9153f42 commit r14-10808-g17f1277d78c51d64a222ade218796837f9153f42 Author: Jeevitha Date: Mon Oct 21 03:54:03 2024 -0500 rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED Corrected the function code for the Atomic Memory Operation "Fetch and Decrement Bounded", changing it from 0x1A to 0x1C. 2024-10-11 Jeevitha Palanisamy gcc/ * config/rs6000/amo.h (enum _AMO_LD): Correct the function code for _AMO_LD_DEC_BOUNDED. (cherry picked from commit 1a4c5643a5911d130dfab9a064222baeeb7f9be7) Diff: --- gcc/config/rs6000/amo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h index 6b9e4e088b97..1303c9d9dab2 100644 --- a/gcc/config/rs6000/amo.h +++ b/gcc/config/rs6000/amo.h @@ -46,7 +46,7 @@ enum _AMO_LD { _AMO_LD_CS_NE= 0x10, /* Compare and Swap Not Equal. */ _AMO_LD_INC_BOUNDED = 0x18, /* Fetch and Increment Bounded. */ _AMO_LD_INC_EQUAL= 0x19, /* Fetch and Increment Equal. */ - _AMO_LD_DEC_BOUNDED = 0x1A /* Fetch and Decrement Bounded. */ + _AMO_LD_DEC_BOUNDED = 0x1C /* Fetch and Decrement Bounded. */ }; /* Implementation of the simple LWAT/LDAT operations that take one register and
[gcc r14-10809] middle-end/115110 - Fix view_converted_memref_p
https://gcc.gnu.org/g:2ac6159f8b5119e75a19f70f3c4578895f59cb53 commit r14-10809-g2ac6159f8b5119e75a19f70f3c4578895f59cb53 Author: Richard Biener Date: Fri May 17 11:02:29 2024 +0200 middle-end/115110 - Fix view_converted_memref_p view_converted_memref_p was checking the reference type against the pointer type of the offset operand rather than its pointed-to type which leads to all refs being subject to view-convert treatment in get_alias_set causing numerous testsuite fails but with its new uses from r15-512-g9b7cad5884f21c is also a wrong-code issue. PR middle-end/115110 * tree-ssa-alias.cc (view_converted_memref_p): Fix. (cherry picked from commit a5b3721c06646bf5b9b50a22964e8e2bd4d03f5f) Diff: --- gcc/tree-ssa-alias.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc index e7c1c1aa6243..72af21c02131 100644 --- a/gcc/tree-ssa-alias.cc +++ b/gcc/tree-ssa-alias.cc @@ -2049,8 +2049,9 @@ view_converted_memref_p (tree base) { if (TREE_CODE (base) != MEM_REF && TREE_CODE (base) != TARGET_MEM_REF) return false; - return same_type_for_tbaa (TREE_TYPE (base), -TREE_TYPE (TREE_OPERAND (base, 1))) != 1; + return (same_type_for_tbaa (TREE_TYPE (base), + TREE_TYPE (TREE_TYPE (TREE_OPERAND (base, 1 + != 1); } /* Return true if an indirect reference based on *PTR1 constrained
[gcc r14-10812] tree-optimization/116907 - stale BLOCK reference from DECL_VALUE_EXPR
https://gcc.gnu.org/g:a4744558b6a1d0a1c203acc827b6ad0cfe039212 commit r14-10812-ga4744558b6a1d0a1c203acc827b6ad0cfe039212 Author: Richard Biener Date: Sun Oct 13 12:44:04 2024 +0200 tree-optimization/116907 - stale BLOCK reference from DECL_VALUE_EXPR When we remove unused BLOCKs we fail to clean references to them from DECL_VALUE_EXPRs of variables in other BLOCKs which in the PR causes LTO streaming to walk into pointers to GGC freed blocks. There's the question of whether such DECL_VALUE_EXPRs should keep variables and blocks referenced live (it doesn't seem to do that) and whether such DECL_VALUE_EXPRs should have survived in the first place. PR tree-optimization/116907 * tree-ssa-live.cc (clear_unused_block_pointer_in_block): New helper. (clear_unused_block_pointer): Call it. (cherry picked from commit 7d15248d41dc45a4ba2d38ff532b672a5c0651d0) Diff: --- gcc/tree-ssa-live.cc | 20 1 file changed, 20 insertions(+) diff --git a/gcc/tree-ssa-live.cc b/gcc/tree-ssa-live.cc index 122d8e245dd0..8b559f2dbd85 100644 --- a/gcc/tree-ssa-live.cc +++ b/gcc/tree-ssa-live.cc @@ -609,6 +609,22 @@ clear_unused_block_pointer_1 (tree *tp, int *, void *) return NULL_TREE; } +/* Clear references to unused BLOCKs from DECL_VALUE_EXPRs of variables + in BLOCK. */ + +static void +clear_unused_block_pointer_in_block (tree block) +{ + for (tree t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) +if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) + { + tree val = DECL_VALUE_EXPR (t); + walk_tree (&val, clear_unused_block_pointer_1, NULL, NULL); + } + for (tree t = BLOCK_SUBBLOCKS (block); t; t = BLOCK_CHAIN (t)) +clear_unused_block_pointer_in_block (t); +} + /* Set all block pointer in debug or clobber stmt to NULL if the block is unused, so that they will not be streamed out. */ @@ -664,6 +680,10 @@ clear_unused_block_pointer (void) walk_tree (gimple_op_ptr (stmt, i), clear_unused_block_pointer_1, NULL, NULL); } + + /* Walk all variables mentioned in the functions BLOCK tree and clear + DECL_VALUE_EXPR from unused blocks where present. */ + clear_unused_block_pointer_in_block (DECL_INITIAL (current_function_decl)); } /* Dump scope blocks starting at SCOPE to FILE. INDENT is the
[gcc r14-10811] tree-optimization/116481 - avoid building function_type[]
https://gcc.gnu.org/g:8d8b8ed7835a1a03932a8c90c7c725f9903450d5 commit r14-10811-g8d8b8ed7835a1a03932a8c90c7c725f9903450d5 Author: Richard Biener Date: Sun Oct 13 11:42:27 2024 +0200 tree-optimization/116481 - avoid building function_type[] The following avoids building an array type with function or method element type during diagnosing an array bound violation as this will result in an error, rejecting a program with a not too useful error message. Instead build such array type manually. PR tree-optimization/116481 * pointer-query.cc (build_printable_array_type): Build an array types with function or method element type manually to avoid bogus diagnostic. * gcc.dg/pr116481.c: New testcase. (cherry picked from commit 1506027347776a2f6ec5b92d56ef192e85944e2e) Diff: --- gcc/pointer-query.cc| 11 +++ gcc/testsuite/gcc.dg/pr116481.c | 13 + 2 files changed, 24 insertions(+) diff --git a/gcc/pointer-query.cc b/gcc/pointer-query.cc index ccf9d823870a..002c8ed2162c 100644 --- a/gcc/pointer-query.cc +++ b/gcc/pointer-query.cc @@ -2587,6 +2587,17 @@ array_elt_at_offset (tree artype, HOST_WIDE_INT off, tree build_printable_array_type (tree eltype, unsigned HOST_WIDE_INT nelts) { + /* Cannot build an array type of functions or methods without + an error diagnostic. */ + if (FUNC_OR_METHOD_TYPE_P (eltype)) +{ + tree arrtype = make_node (ARRAY_TYPE); + TREE_TYPE (arrtype) = eltype; + TYPE_SIZE (arrtype) = bitsize_zero_node; + TYPE_SIZE_UNIT (arrtype) = size_zero_node; + return arrtype; +} + if (TYPE_SIZE_UNIT (eltype) && TREE_CODE (TYPE_SIZE_UNIT (eltype)) == INTEGER_CST && !integer_zerop (TYPE_SIZE_UNIT (eltype)) diff --git a/gcc/testsuite/gcc.dg/pr116481.c b/gcc/testsuite/gcc.dg/pr116481.c new file mode 100644 index ..3ee6d7470876 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr116481.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -Warray-bounds" } */ + +extern void tramp (); + +int is_trampoline (void* function) /* { dg-bogus "arrays of functions are not meaningful" } */ +{ + void* tramp_address = tramp; + if (!(((unsigned long)function & 3) == 2)) +return 0; + return (((long *) ((char*)function - 2))[0] + == ((long *) ((char*)tramp_address-2))[0]); /* { dg-warning "outside array bounds" } */ +}
[gcc r14-10810] tree-optimization/116290 - fix compare-debug issue in ldist
https://gcc.gnu.org/g:69934cb171fdd9d58dd64bb1811afaf43f6f7e44 commit r14-10810-g69934cb171fdd9d58dd64bb1811afaf43f6f7e44 Author: Richard Biener Date: Sun Oct 13 15:12:44 2024 +0200 tree-optimization/116290 - fix compare-debug issue in ldist Loop distribution does different analysis with -g0/-g due to counting a debug stmt starting a BB against a limit which will everntually lead to different IVOPTs choices. I've fixed a possible IVOPTs issue on the way even though it doesn't make a difference here. PR tree-optimization/116290 * tree-loop-distribution.cc (determine_reduction_stmt_1): PHIs have no debug variants. Start with first non-debug real stmt. * tree-ssa-loop-ivopts.cc (find_givs_in_bb): Do not analyze debug stmts. * gcc.dg/pr116290.c: New testcase. (cherry picked from commit 566740013b3445162b8c4bc2205e4e568d014968) Diff: --- gcc/testsuite/gcc.dg/pr116290.c | 18 ++ gcc/tree-loop-distribution.cc | 6 +++--- gcc/tree-ssa-loop-ivopts.cc | 3 ++- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.dg/pr116290.c b/gcc/testsuite/gcc.dg/pr116290.c new file mode 100644 index ..97b946bda893 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr116290.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-g -O2 -fcompare-debug" } */ + +char *camel_message_info_class_intern_init_part; +void g_once_init_enter(); +void camel_message_info_class_intern_init() { + int ii; + char *label; + for (; camel_message_info_class_intern_init_part[ii]; ii++) +if (camel_message_info_class_intern_init_part) { + if (label && *label) +g_once_init_enter(); + label = &camel_message_info_class_intern_init_part[ii + 1]; + camel_message_info_class_intern_init_part[ii] = ' '; +} + if (label) +g_once_init_enter(); +} diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc index cb804ba48ffe..2bbd8da33e86 100644 --- a/gcc/tree-loop-distribution.cc +++ b/gcc/tree-loop-distribution.cc @@ -3551,7 +3551,7 @@ determine_reduction_stmt_1 (const loop_p loop, const basic_block *bbs) basic_block bb = bbs[i]; for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); - gsi_next_nondebug (&bsi)) + gsi_next (&bsi)) { gphi *phi = bsi.phi (); if (virtual_operand_p (gimple_phi_result (phi))) @@ -3564,8 +3564,8 @@ determine_reduction_stmt_1 (const loop_p loop, const basic_block *bbs) } } - for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); - gsi_next_nondebug (&bsi), ++ninsns) + for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb); + !gsi_end_p (bsi); gsi_next_nondebug (&bsi), ++ninsns) { /* Bail out early for loops which are unlikely to match. */ if (ninsns > 16) diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc index 7cae5bdefea3..a904910999f5 100644 --- a/gcc/tree-ssa-loop-ivopts.cc +++ b/gcc/tree-ssa-loop-ivopts.cc @@ -1460,7 +1460,8 @@ find_givs_in_bb (struct ivopts_data *data, basic_block bb) gimple_stmt_iterator bsi; for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) -find_givs_in_stmt (data, gsi_stmt (bsi)); +if (!is_gimple_debug (gsi_stmt (bsi))) + find_givs_in_stmt (data, gsi_stmt (bsi)); } /* Finds general ivs. */
[gcc r14-10813] tree-optimization/116982 - analyze scalar loop exit early
https://gcc.gnu.org/g:1d11536881e60f36a2b8ad9919169ac7a8bc0e3e commit r14-10813-g1d11536881e60f36a2b8ad9919169ac7a8bc0e3e Author: Richard Biener Date: Mon Oct 7 11:05:17 2024 +0200 tree-optimization/116982 - analyze scalar loop exit early The following makes sure to discover the scalar loop IV exit during analysis as failure to do so (if DCE and friends are disabled this can happen due to if-conversion doing DCE and FRE on the if-converted loop) would ICE later. I refrained from larger refactoring to be able to eventually backport. PR tree-optimization/116982 * tree-vectorizer.h (vect_analyze_loop): Pass in .LOOP_VECTORIZED call. (vect_analyze_loop_form): Likewise. * tree-vect-loop.cc (vect_analyze_loop_form): Reject loops where we cannot determine a IV exit for the scalar loop. (vect_analyze_loop): Adjust. * tree-vectorizer.cc (try_vectorize_loop_1): Likewise. * tree-parloops.cc (gather_scalar_reductions): Likewise. (cherry picked from commit 9b86efd5210101954bd187c3aa8bb909610a5746) Diff: --- gcc/tree-parloops.cc | 4 ++-- gcc/tree-vect-loop.cc | 23 +++ gcc/tree-vectorizer.cc | 3 ++- gcc/tree-vectorizer.h | 6 -- 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/gcc/tree-parloops.cc b/gcc/tree-parloops.cc index 888a834faf91..4d7a4ec94378 100644 --- a/gcc/tree-parloops.cc +++ b/gcc/tree-parloops.cc @@ -3304,7 +3304,7 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list vec_info_shared shared; vect_loop_form_info info; - if (!vect_analyze_loop_form (loop, &info)) + if (!vect_analyze_loop_form (loop, NULL, &info)) goto gather_done; simple_loop_info = vect_create_loop_vinfo (loop, &shared, &info); @@ -3346,7 +3346,7 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list { vec_info_shared shared; vect_loop_form_info info; - if (vect_analyze_loop_form (loop->inner, &info)) + if (vect_analyze_loop_form (loop->inner, NULL, &info)) { simple_loop_info = vect_create_loop_vinfo (loop->inner, &shared, &info); diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 744044735d39..dfb9d1be6670 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -1734,7 +1734,8 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo) niter could be analyzed under some assumptions. */ opt_result -vect_analyze_loop_form (class loop *loop, vect_loop_form_info *info) +vect_analyze_loop_form (class loop *loop, gimple *loop_vectorized_call, + vect_loop_form_info *info) { DUMP_VECT_SCOPE ("vect_analyze_loop_form"); @@ -1744,6 +1745,18 @@ vect_analyze_loop_form (class loop *loop, vect_loop_form_info *info) "not vectorized:" " could not determine main exit from" " loop with multiple exits.\n"); + if (loop_vectorized_call) +{ + tree arg = gimple_call_arg (loop_vectorized_call, 1); + class loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg)); + edge scalar_exit_e = vec_init_loop_exit_info (scalar_loop); + if (!scalar_exit_e) + return opt_result::failure_at (vect_location, + "not vectorized:" + " could not determine main exit from" + " loop with multiple exits.\n"); +} + info->loop_exit = exit_e; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1815,7 +1828,7 @@ vect_analyze_loop_form (class loop *loop, vect_loop_form_info *info) /* Analyze the inner-loop. */ vect_loop_form_info inner; - opt_result res = vect_analyze_loop_form (loop->inner, &inner); + opt_result res = vect_analyze_loop_form (loop->inner, NULL, &inner); if (!res) { if (dump_enabled_p ()) @@ -3570,7 +3583,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, for it. The different analyses will record information in the loop_vec_info struct. */ opt_loop_vec_info -vect_analyze_loop (class loop *loop, vec_info_shared *shared) +vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, + vec_info_shared *shared) { DUMP_VECT_SCOPE ("analyze_loop_nest"); @@ -3588,7 +3602,8 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) /* Analyze the loop form. */ vect_loop_form_info loop_form_info; - opt_result res = vect_analyze_loop_form (loop, &loop_form_info); + opt_result res = vect_analyze_loop_form (loop, loop_vectorized_call, + &loop_form_info); if (!res) { if (dump_enabled_p ()) diff --git a/gcc/tree-v
[gcc r14-10814] tree-optimization/117104 - add missed guards to max(a, b) != a simplification
https://gcc.gnu.org/g:44c3eba2dfa71cb7cd9f8c3e7f33ef2b08132a51 commit r14-10814-g44c3eba2dfa71cb7cd9f8c3e7f33ef2b08132a51 Author: Richard Biener Date: Sat Oct 12 14:51:37 2024 +0200 tree-optimization/117104 - add missed guards to max(a,b) != a simplification For vector types we have to make sure the comparison result is a vector type and the resulting compare operation is supported. As the resulting compare is never an equality compare I didn't bother to check for the cbranch case. PR tree-optimization/117104 * match.pd ((cmp:c (minmax:c @0 @1) @0) -> (out @0 @1)): Properly guard the vector case. * gcc.dg/pr117104.c: New testcase. (cherry picked from commit f54d42e7e7a558b273d87f95b3e5b1938f5a) Diff: --- gcc/match.pd| 6 +- gcc/testsuite/gcc.dg/pr117104.c | 12 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index 41afdfbe59de..62edaf1267e3 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4167,7 +4167,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) out(le gt gt le ge lt lt ge ) (simplify (cmp:c (minmax:c @0 @1) @0) - (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))) + (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && (!VECTOR_TYPE_P (TREE_TYPE (@0)) + || (VECTOR_TYPE_P (type) + && (!expand_vec_cmp_expr_p (TREE_TYPE (@0), type, cmp) + || expand_vec_cmp_expr_p (TREE_TYPE (@0), type, out) (out @0 @1 /* MIN (X, 5) == 0 -> X == 0 MIN (X, 5) == 7 -> false */ diff --git a/gcc/testsuite/gcc.dg/pr117104.c b/gcc/testsuite/gcc.dg/pr117104.c new file mode 100644 index ..9aa5734f7927 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr117104.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-vect-cost-model" } */ +/* { dg-additional-options "-mavx" { target { x86_64-*-* i?86-*-* } } } */ + +void g(); +void f(long *a) +{ + long b0 = a[0] > 0 ? a[0] : 0; + long b1 = a[1] > 0 ? a[1] : 0; + if ((b0|b1) == 0) +g(); +}
[gcc r15-4515] libstdc++: Fix order of [[...]] and __attribute__((...)) attrs [PR117220]
https://gcc.gnu.org/g:cba80691251efccf44ab9aecb26558319605c9ea commit r15-4515-gcba80691251efccf44ab9aecb26558319605c9ea Author: Jonathan Wakely Date: Mon Oct 21 12:09:36 2024 +0100 libstdc++: Fix order of [[...]] and __attribute__((...)) attrs [PR117220] GCC allows these in either order, but Clang doesn't like the C++11-style [[__nodiscard__]] coming after __attribute__((__always_inline__)). libstdc++-v3/ChangeLog: PR libstdc++/117220 * include/bits/stl_iterator.h: Move _GLIBCXX_NODISCARD annotations after __attribute__((__always_inline__)). Diff: --- libstdc++-v3/include/bits/stl_iterator.h | 46 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/libstdc++-v3/include/bits/stl_iterator.h b/libstdc++-v3/include/bits/stl_iterator.h index 26c5eab4b4e8..1fbc115b1163 100644 --- a/libstdc++-v3/include/bits/stl_iterator.h +++ b/libstdc++-v3/include/bits/stl_iterator.h @@ -1077,13 +1077,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Forward iterator requirements - __attribute__((__always_inline__)) _GLIBCXX_NODISCARD + _GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR reference operator*() const _GLIBCXX_NOEXCEPT { return *_M_current; } - __attribute__((__always_inline__)) _GLIBCXX_NODISCARD + _GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR pointer operator->() const _GLIBCXX_NOEXCEPT @@ -1123,7 +1123,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Random access iterator requirements - __attribute__((__always_inline__)) _GLIBCXX_NODISCARD + _GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR reference operator[](difference_type __n) const _GLIBCXX_NOEXCEPT @@ -1135,7 +1135,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION operator+=(difference_type __n) _GLIBCXX_NOEXCEPT { _M_current += __n; return *this; } - __attribute__((__always_inline__)) _GLIBCXX_NODISCARD + _GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR __normal_iterator operator+(difference_type __n) const _GLIBCXX_NOEXCEPT @@ -1147,13 +1147,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION operator-=(difference_type __n) _GLIBCXX_NOEXCEPT { _M_current -= __n; return *this; } - __attribute__((__always_inline__)) _GLIBCXX_NODISCARD + _GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR __normal_iterator operator-(difference_type __n) const _GLIBCXX_NOEXCEPT { return __normal_iterator(_M_current - __n); } - __attribute__((__always_inline__)) _GLIBCXX_NODISCARD + _GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR const _Iterator& base() const _GLIBCXX_NOEXCEPT @@ -1209,7 +1209,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #else // Forward iterator requirements template -__attribute__((__always_inline__)) _GLIBCXX_NODISCARD _GLIBCXX_CONSTEXPR +_GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR inline bool operator==(const __normal_iterator<_IteratorL, _Container>& __lhs, const __normal_iterator<_IteratorR, _Container>& __rhs) @@ -1217,7 +1217,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { return __lhs.base() == __rhs.base(); } template -__attribute__((__always_inline__)) _GLIBCXX_NODISCARD _GLIBCXX_CONSTEXPR +_GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR inline bool operator==(const __normal_iterator<_Iterator, _Container>& __lhs, const __normal_iterator<_Iterator, _Container>& __rhs) @@ -1225,7 +1225,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { return __lhs.base() == __rhs.base(); } template -__attribute__((__always_inline__)) _GLIBCXX_NODISCARD _GLIBCXX_CONSTEXPR +_GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR inline bool operator!=(const __normal_iterator<_IteratorL, _Container>& __lhs, const __normal_iterator<_IteratorR, _Container>& __rhs) @@ -1233,7 +1233,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { return __lhs.base() != __rhs.base(); } template -__attribute__((__always_inline__)) _GLIBCXX_NODISCARD _GLIBCXX_CONSTEXPR +_GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR inline bool operator!=(const __normal_iterator<_Iterator, _Container>& __lhs, const __normal_iterator<_Iterator, _Container>& __rhs) @@ -1242,7 +1242,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Random access iterator requirements template -__attribute__((__always_inline__)) _GLIBCXX_NODISCARD _GLIBCXX_CONSTEXPR +_GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR inline bool operator<(const __normal_iterator<_IteratorL, _Container>& __
[gcc r15-4517] libstdc++: Improve 26_numerics/headers/cmath/types_std_c++0x_neg.cc
https://gcc.gnu.org/g:d0d99fc6b6c4f1c3fa8a9427f461103c78ab457b commit r15-4517-gd0d99fc6b6c4f1c3fa8a9427f461103c78ab457b Author: Jonathan Wakely Date: Fri Oct 18 12:02:45 2024 +0100 libstdc++: Improve 26_numerics/headers/cmath/types_std_c++0x_neg.cc This test checks that the special functions in are not declared prior to C++17. But we can remove the target selector and allow it to be tested for C++17 and later, and add target selectors to the individual dg-error directives instead. Also rename the test to match what it actually tests. libstdc++-v3/ChangeLog: * testsuite/26_numerics/headers/cmath/types_std_c++0x_neg.cc: Move to ... * testsuite/26_numerics/headers/cmath/specfun_c++17.cc: here and adjust test to be valid for all -std dialects. Diff: --- .../{types_std_c++0x_neg.cc => specfun_c++17.cc} | 47 +++--- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/libstdc++-v3/testsuite/26_numerics/headers/cmath/types_std_c++0x_neg.cc b/libstdc++-v3/testsuite/26_numerics/headers/cmath/specfun_c++17.cc similarity index 57% rename from libstdc++-v3/testsuite/26_numerics/headers/cmath/types_std_c++0x_neg.cc rename to libstdc++-v3/testsuite/26_numerics/headers/cmath/specfun_c++17.cc index 977f800a4b07..efb60ea1fbbe 100644 --- a/libstdc++-v3/testsuite/26_numerics/headers/cmath/types_std_c++0x_neg.cc +++ b/libstdc++-v3/testsuite/26_numerics/headers/cmath/specfun_c++17.cc @@ -1,4 +1,4 @@ -// { dg-do compile { target { ! c++17 } } } +// { dg-do compile } // Copyright (C) 2007-2024 Free Software Foundation, Inc. // @@ -21,28 +21,29 @@ namespace gnu { - // C++11 changes from TR1. - using std::assoc_laguerre; // { dg-error "has not been declared" } - using std::assoc_legendre; // { dg-error "has not been declared" } - using std::beta; // { dg-error "has not been declared" } - using std::comp_ellint_1;// { dg-error "has not been declared" } - using std::comp_ellint_2;// { dg-error "has not been declared" } - using std::comp_ellint_3;// { dg-error "has not been declared" } + // C++17 additions from TR1. + using std::assoc_laguerre; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::assoc_legendre; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::beta; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::comp_ellint_1;// { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::comp_ellint_2;// { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::comp_ellint_3;// { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::cyl_bessel_i; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::cyl_bessel_j; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::cyl_bessel_k; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::cyl_neumann; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::ellint_1; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::ellint_2; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::ellint_3; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::expint; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::hermite; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::laguerre; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::legendre; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::riemann_zeta; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::sph_bessel; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::sph_legendre; // { dg-error "has not been declared" "" { target { ! c++17 } } } + using std::sph_neumann; // { dg-error "has not been declared" "" { target { ! c++17 } } } + // These two were in TR1 but not added to C++17. using std::conf_hyperg; // { dg-error "has not been declared" } - using std::cyl_bessel_i; // { dg-error "has not been declared" } - using std::cyl_bessel_j; // { dg-error "has not been declared" } - using std::cyl_bessel_k; // { dg-error "has not been declared" } - using std::cyl_neumann; // { dg-error "has not been declared" } - using std::ellint_1; // { dg-error "has not been declared" } - using std::ellint_2; // { dg-error "has not been declared" } - using std::ellint_3; // { dg-error "has not been declared" } - using std::expint; // { dg-error "has not been declared" } - using std::h
[gcc r15-4516] libstdc++: Simplify C++98 std::vector::_M_data_ptr overload set
https://gcc.gnu.org/g:1003a428154cd2e556c1fba994d4f3ea2442fc95 commit r15-4516-g1003a428154cd2e556c1fba994d4f3ea2442fc95 Author: Jonathan Wakely Date: Fri Oct 18 11:55:08 2024 +0100 libstdc++: Simplify C++98 std::vector::_M_data_ptr overload set We don't need separate overloads for returning a const or non-const pointer. We can make the member function const and return a non-const pointer, and let vector::data() const convert it to const as needed. libstdc++-v3/ChangeLog: * include/bits/stl_vector.h (vector::_M_data_ptr): Remove non-const overloads. Always return non-const pointer. Diff: --- libstdc++-v3/include/bits/stl_vector.h | 12 +--- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/libstdc++-v3/include/bits/stl_vector.h b/libstdc++-v3/include/bits/stl_vector.h index e284536ad31e..8982ca2b9eee 100644 --- a/libstdc++-v3/include/bits/stl_vector.h +++ b/libstdc++-v3/include/bits/stl_vector.h @@ -2034,20 +2034,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER _M_data_ptr(_Ptr __ptr) const { return empty() ? nullptr : std::__to_address(__ptr); } #else - template - _Up* - _M_data_ptr(_Up* __ptr) _GLIBCXX_NOEXCEPT - { return __ptr; } - template value_type* - _M_data_ptr(_Ptr __ptr) - { return empty() ? (value_type*)0 : __ptr.operator->(); } - - template - const value_type* _M_data_ptr(_Ptr __ptr) const - { return empty() ? (const value_type*)0 : __ptr.operator->(); } + { return empty() ? (value_type*)0 : __ptr.operator->(); } #endif };
[gcc r15-4518] pair-fusion: Assume alias conflict if common address reg changes [PR116783]
https://gcc.gnu.org/g:c0e54ce1999ccf2241f74c5188b11b92e5aedc1f commit r15-4518-gc0e54ce1999ccf2241f74c5188b11b92e5aedc1f Author: Alex Coplan Date: Fri Sep 20 17:39:39 2024 +0100 pair-fusion: Assume alias conflict if common address reg changes [PR116783] As the PR shows, pair-fusion was tricking memory_modified_in_insn_p into returning false when a common base register (in this case, x1) was modified between the mem and the store insn. This lead to wrong code as the accesses really did alias. To avoid this sort of problem, this patch avoids invoking RTL alias analysis altogether (and assume an alias conflict) if the two insns to be compared share a common address register R, and the insns see different definitions of R (i.e. it was modified in between). gcc/ChangeLog: PR rtl-optimization/116783 * pair-fusion.cc (def_walker::cand_addr_uses): New. (def_walker::def_walker): Add parameter for candidate address uses. (def_walker::alias_conflict_p): Declare. (def_walker::addr_reg_conflict_p): New. (def_walker::conflict_p): New. (store_walker::store_walker): Add parameter for candidate address uses and pass to base ctor. (store_walker::conflict_p): Rename to ... (store_walker::alias_conflict_p): ... this. (load_walker::load_walker): Add parameter for candidate address uses and pass to base ctor. (load_walker::conflict_p): Rename to ... (load_walker::alias_conflict_p): ... this. (pair_fusion_bb_info::try_fuse_pair): Collect address register uses for candidate insns and pass down to alias walkers. gcc/testsuite/ChangeLog: PR rtl-optimization/116783 * g++.dg/torture/pr116783.C: New test. Diff: --- gcc/pair-fusion.cc | 127 +--- gcc/testsuite/g++.dg/torture/pr116783.C | 98 2 files changed, 213 insertions(+), 12 deletions(-) diff --git a/gcc/pair-fusion.cc b/gcc/pair-fusion.cc index 653055fdcf67..ccbb5511e9d1 100644 --- a/gcc/pair-fusion.cc +++ b/gcc/pair-fusion.cc @@ -2089,11 +2089,80 @@ protected: def_iter_t def_iter; insn_info *limit; - def_walker (def_info *def, insn_info *limit) : -def_iter (def), limit (limit) {} + + // Array of register uses from the candidate insn which occur in MEMs. + use_array cand_addr_uses; + + def_walker (def_info *def, insn_info *limit, use_array addr_uses) : +def_iter (def), limit (limit), cand_addr_uses (addr_uses) {} virtual bool iter_valid () const { return *def_iter; } + // Implemented in {load,store}_walker. + virtual bool alias_conflict_p (int &budget) const = 0; + + // Return true if the current (walking) INSN () uses a register R inside a + // MEM, where R is also used inside a MEM by the (static) candidate insn, and + // those uses see different definitions of that register. In this case we + // can't rely on RTL alias analysis, and for now we conservatively assume that + // there is an alias conflict. See PR116783. + bool addr_reg_conflict_p () const + { +use_array curr_insn_uses = insn ()->uses (); +auto cand_use_iter = cand_addr_uses.begin (); +auto insn_use_iter = curr_insn_uses.begin (); +while (cand_use_iter != cand_addr_uses.end () + && insn_use_iter != curr_insn_uses.end ()) + { + auto insn_use = *insn_use_iter; + auto cand_use = *cand_use_iter; + if (insn_use->regno () > cand_use->regno ()) + cand_use_iter++; + else if (insn_use->regno () < cand_use->regno ()) + insn_use_iter++; + else + { + // As it stands I believe the alias code (memory_modified_in_insn_p) + // doesn't look at insn notes such as REG_EQU{IV,AL}, so it should + // be safe to skip over uses that only occur in notes. + if (insn_use->includes_address_uses () + && !insn_use->only_occurs_in_notes () + && insn_use->def () != cand_use->def ()) + { + if (dump_file) + { + fprintf (dump_file, +"assuming aliasing of cand i%d and i%d:\n" +"-> insns see different defs of common addr reg r%u\n" +"-> ", +cand_use->insn ()->uid (), insn_use->insn ()->uid (), +insn_use->regno ()); + + // Note that while the following sequence could be made more + // concise by eliding pp_string calls into the pp_printf + // calls, doing so triggers -Wformat-diag. + pretty_printer pp; + pp_string (&pp, "["); + pp_access (&pp, cand_use, 0); + pp_st
[gcc r15-4519] amdgcn: silence warning
https://gcc.gnu.org/g:0b6d94ce72b2f35dbee7c42774d6972671c86f97 commit r15-4519-g0b6d94ce72b2f35dbee7c42774d6972671c86f97 Author: Andrew Stubbs Date: Mon Sep 16 12:31:59 2024 + amdgcn: silence warning FIRST_SGPR_REG is register zero so the compiler always claims this comparison is redundant. It's right, of course, but I'd have preferred to keep the comparison for completeness. Probably the "correct" solution is to use an enum for these values. gcc/ChangeLog: * config/gcn/gcn.h (SGPR_REGNO_P): Silence warning. Diff: --- gcc/config/gcn/gcn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h index 1a4631dd39f6..faefe68cdfa9 100644 --- a/gcc/config/gcn/gcn.h +++ b/gcc/config/gcn/gcn.h @@ -191,7 +191,7 @@ STATIC_ASSERT (LAST_AVGPR_REG + 1 - FIRST_AVGPR_REG == 256); #define HARD_FRAME_POINTER_IS_ARG_POINTER 0 #define HARD_FRAME_POINTER_IS_FRAME_POINTER 0 -#define SGPR_REGNO_P(N)((N) >= FIRST_SGPR_REG && (N) <= LAST_SGPR_REG) +#define SGPR_REGNO_P(N)(/*(N) >= FIRST_SGPR_REG &&*/ (N) <= LAST_SGPR_REG) #define VGPR_REGNO_P(N)((N) >= FIRST_VGPR_REG && (N) <= LAST_VGPR_REG) #define AVGPR_REGNO_P(N)((N) >= FIRST_AVGPR_REG && (N) <= LAST_AVGPR_REG) #define SSRC_REGNO_P(N)((N) <= SCC_REG && (N) != VCCZ_REG)
[gcc(refs/users/meissner/heads/work181-sha)] Revert changes
https://gcc.gnu.org/g:8cce3a176a4edbcc4e7dfb4f0459a217abbdcd31 commit 8cce3a176a4edbcc4e7dfb4f0459a217abbdcd31 Author: Michael Meissner Date: Mon Oct 21 13:10:54 2024 -0400 Revert changes Diff: --- gcc/config/rs6000/fusion.md| 352 - gcc/config/rs6000/genfusion.pl | 4 +- 2 files changed, 178 insertions(+), 178 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 6f9081ab3372..215a3aae074f 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -1872,16 +1872,16 @@ ;; vector vand -> vand (define_insn "*fuse_vand_vand" [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") -(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" "wa,v,v,v,v") - (match_operand:VM 1 "vector_fusion_operand" "%wa,v,v,v,v")) - (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v"))) +(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "%v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ - xxeval %x3,%x2,%x1,%x0,1 vand %3,%1,%0\;vand %3,%3,%2 vand %3,%1,%0\;vand %3,%3,%2 vand %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,1 vand %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") @@ -1893,16 +1893,16 @@ ;; vector vandc -> vand (define_insn "*fuse_vandc_vand" [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") -(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "wa,v,v,v,v")) - (match_operand:VM 1 "vector_fusion_operand" "wa,v,v,v,v")) - (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v"))) +(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ - xxeval %x3,%x2,%x1,%x0,2 vandc %3,%1,%0\;vand %3,%3,%2 vandc %3,%1,%0\;vand %3,%3,%2 vandc %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,2 vandc %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") @@ -1914,16 +1914,16 @@ ;; vector veqv -> vand (define_insn "*fuse_veqv_vand" [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") -(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "wa,v,v,v,v") - (match_operand:VM 1 "vector_fusion_operand" "wa,v,v,v,v"))) - (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v"))) +(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ - xxeval %x3,%x2,%x1,%x0,9 veqv %3,%1,%0\;vand %3,%3,%2 veqv %3,%1,%0\;vand %3,%3,%2 veqv %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,9 veqv %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") @@ -1935,16 +1935,16 @@ ;; vector vnand -> vand (define_insn "*fuse_vnand_vand" [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") -(and:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "wa,v,v,v,v")) - (not:VM (match_operand:VM 1 "vector_fusion_operand" "wa,v,v,v,v"))) - (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v"))) +(and:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v")) + (not:VM (match_operand:VM 1 "vector_fusion_operand" "v,v,v,wa,v"))) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ - xxeval %x3,%x2,%x1,%x0,14 vnand %3,%1,%0\;vand %3,%3,%2 vnand %3,%1,%0\;vand %3,%3,%2 vnand %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,14 vnand %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") @@ -1956,16 +1956,16 @@ ;; vector vnor -> vand (define_insn "*fuse_vnor_vand" [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") -(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" "wa,v,v,v,v")) - (not:VM (match_operand:VM 1 "vector_fusion_operand" "wa,v,v,v,v"))) - (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,
[gcc r15-4529] RISC-V: Add testcases for form 6 of vector signed SAT_TRUNC
https://gcc.gnu.org/g:f411abe7935e01b7e61f966d12a7a0850ca8f1c0 commit r15-4529-gf411abe7935e01b7e61f966d12a7a0850ca8f1c0 Author: Pan Li Date: Mon Oct 14 14:55:56 2024 +0800 RISC-V: Add testcases for form 6 of vector signed SAT_TRUNC Form 6: #define DEF_VEC_SAT_S_TRUNC_FMT_6(NT, WT, NT_MIN, NT_MAX) \ void __attribute__((noinline))\ vec_sat_s_trunc_##NT##_##WT##_fmt_6 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ NT trunc = (NT)x; \ out[i] = (WT)NT_MIN >= x || x > (WT)NT_MAX \ ? x < 0 ? NT_MIN : NT_MAX \ j: trunc; \ } \ } The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i64-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i64-to-i8.c: New test. Signed-off-by: Pan Li Diff: --- .../rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-6-i64-to-i16.c| 9 + .../autovec/unop/vec_sat_s_trunc-6-i64-to-i32.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-6-i64-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-run-6-i16-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-6-i32-to-i16.c| 16 .../autovec/unop/vec_sat_s_trunc-run-6-i32-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-6-i64-to-i16.c| 16 .../unop/vec_sat_s_trunc-run-6-i64-to-i32.c| 16 .../autovec/unop/vec_sat_s_trunc-run-6-i64-to-i8.c | 16 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 22 ++ 13 files changed, 172 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c new file mode 100644 index ..c97057355c40 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_6(int8_t, int16_t, INT8_MIN, INT8_MAX) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ +/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c new file mode 100644 index ..629c07347bb9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_6(int16_t, int32_t, INT16_MIN, INT16_MAX) + +/* { dg-final { scan-rtl-dump-times
[gcc r15-4527] RISC-V: Add testcases for form 4 of vector signed SAT_TRUNC
https://gcc.gnu.org/g:f30ca9867a77c78f3a48bc124ab3bc4ce32283fa commit r15-4527-gf30ca9867a77c78f3a48bc124ab3bc4ce32283fa Author: Pan Li Date: Mon Oct 14 11:41:02 2024 +0800 RISC-V: Add testcases for form 4 of vector signed SAT_TRUNC Form 4: #define DEF_VEC_SAT_S_TRUNC_FMT_4(NT, WT, NT_MIN, NT_MAX) \ void __attribute__((noinline))\ vec_sat_s_trunc_##NT##_##WT##_fmt_4 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ NT trunc = (NT)x; \ out[i] = (WT)NT_MIN <= x && x < (WT)NT_MAX \ ? trunc \ : x < 0 ? NT_MIN : NT_MAX;\ } \ } The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i64-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i64-to-i8.c: New test. Signed-off-by: Pan Li Diff: --- .../rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-4-i64-to-i16.c| 9 + .../autovec/unop/vec_sat_s_trunc-4-i64-to-i32.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-4-i64-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-run-4-i16-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-4-i32-to-i16.c| 16 .../autovec/unop/vec_sat_s_trunc-run-4-i32-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-4-i64-to-i16.c| 16 .../unop/vec_sat_s_trunc-run-4-i64-to-i32.c| 16 .../autovec/unop/vec_sat_s_trunc-run-4-i64-to-i8.c | 16 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 22 ++ 13 files changed, 172 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c new file mode 100644 index ..2ac96aa1a35b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_4(int8_t, int16_t, INT8_MIN, INT8_MAX) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ +/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c new file mode 100644 index ..7fe8f2774767 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_4(int16_t, int32_t, INT16_MIN, INT16_MAX) + +/* { dg-final { scan-rtl-dump-times
[gcc r15-4524] RISC-V: Add testcases for form 1 of vector signed SAT_TRUNC
https://gcc.gnu.org/g:1f3a9c08aff9aac53d6c12b658efc222cf91de9c commit r15-4524-g1f3a9c08aff9aac53d6c12b658efc222cf91de9c Author: Pan Li Date: Mon Oct 14 10:21:39 2024 +0800 RISC-V: Add testcases for form 1 of vector signed SAT_TRUNC Form 1: #define DEF_VEC_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX) \ void __attribute__((noinline))\ vec_sat_s_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ NT trunc = (NT)x; \ out[i] = (WT)NT_MIN <= x && x <= (WT)NT_MAX \ ? trunc \ : x < 0 ? NT_MIN : NT_MAX;\ } \ } The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h: Add test data for signed SAT_TRUNC. * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i64-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i64-to-i8.c: New test. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/unop/vec_sat_data.h | 291 + .../rvv/autovec/unop/vec_sat_s_trunc-1-i16-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-1-i32-to-i16.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-1-i32-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-1-i64-to-i16.c| 9 + .../autovec/unop/vec_sat_s_trunc-1-i64-to-i32.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-1-i64-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-run-1-i16-to-i8.c | 16 ++ .../unop/vec_sat_s_trunc-run-1-i32-to-i16.c| 16 ++ .../autovec/unop/vec_sat_s_trunc-run-1-i32-to-i8.c | 16 ++ .../unop/vec_sat_s_trunc-run-1-i64-to-i16.c| 16 ++ .../unop/vec_sat_s_trunc-run-1-i64-to-i32.c| 16 ++ .../autovec/unop/vec_sat_s_trunc-run-1-i64-to-i8.c | 16 ++ .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 22 ++ 14 files changed, 463 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h index 6b23ec809f6c..a3643c5e1218 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h @@ -25,6 +25,15 @@ TEST_UNARY_STRUCT(uint16_t, uint64_t) TEST_UNARY_STRUCT(uint32_t, uint64_t) +TEST_UNARY_STRUCT(int8_t, int16_t) +TEST_UNARY_STRUCT(int8_t, int32_t) +TEST_UNARY_STRUCT(int8_t, int64_t) + +TEST_UNARY_STRUCT(int16_t, int32_t) +TEST_UNARY_STRUCT(int16_t, int64_t) + +TEST_UNARY_STRUCT(int32_t, int64_t) + TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \ TEST_UNARY_DATA(uint8_t, uint16_t)[] = { @@ -391,4 +400,286 @@ TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \ }, }; +TEST_UNARY_STRUCT_DECL(int8_t, int16_t) \ + TEST_UNARY_DATA(int8_t, int16_t)[] = +{ + { +{ + 0, 0, 0, 0, + -1, -1, -1, -1, + 1, 1, 1, 1, + 2, 2, 2, 2, +}, +{ + 0, 0, 0, 0, + -1, -1, -1, -1, + 1, 1, 1, 1, + 2, 2, 2, 2, +}, + }, + { +{ + 127, 127, 127, 127, + 128, 128, 128, 128, + -128, -128, -128, -
[gcc r15-4526] RISC-V: Add testcases for form 3 of vector signed SAT_TRUNC
https://gcc.gnu.org/g:efa1617bfc095e0667df31a6f3a2c0319afbc8d0 commit r15-4526-gefa1617bfc095e0667df31a6f3a2c0319afbc8d0 Author: Pan Li Date: Mon Oct 14 11:26:06 2024 +0800 RISC-V: Add testcases for form 3 of vector signed SAT_TRUNC Form 3: #define DEF_VEC_SAT_S_TRUNC_FMT_3(NT, WT, NT_MIN, NT_MAX) \ void __attribute__((noinline))\ vec_sat_s_trunc_##NT##_##WT##_fmt_3 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ NT trunc = (NT)x; \ out[i] = (WT)NT_MIN < x && x < (WT)NT_MAX \ ? trunc \ : x < 0 ? NT_MIN : NT_MAX;\ } \ } The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i64-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i64-to-i8.c: New test. Signed-off-by: Pan Li Diff: --- .../rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-3-i64-to-i16.c| 9 + .../autovec/unop/vec_sat_s_trunc-3-i64-to-i32.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-3-i64-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-run-3-i16-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-3-i32-to-i16.c| 16 .../autovec/unop/vec_sat_s_trunc-run-3-i32-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-3-i64-to-i16.c| 16 .../unop/vec_sat_s_trunc-run-3-i64-to-i32.c| 16 .../autovec/unop/vec_sat_s_trunc-run-3-i64-to-i8.c | 16 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 22 ++ 13 files changed, 172 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c new file mode 100644 index ..392366def060 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_3(int8_t, int16_t, INT8_MIN, INT8_MAX) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ +/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c new file mode 100644 index ..2b16049994a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_3(int16_t, int32_t, INT16_MIN, INT16_MAX) + +/* { dg-final { scan-rtl-dump-times
[gcc r15-4525] RISC-V: Add testcases for form 2 of vector signed SAT_TRUNC
https://gcc.gnu.org/g:033900fc175bbd67fd1a8c8f7410a21f8b04eda2 commit r15-4525-g033900fc175bbd67fd1a8c8f7410a21f8b04eda2 Author: Pan Li Date: Mon Oct 14 11:09:55 2024 +0800 RISC-V: Add testcases for form 2 of vector signed SAT_TRUNC Form 2: #define DEF_VEC_SAT_S_TRUNC_FMT_2(NT, WT, NT_MIN, NT_MAX) \ void __attribute__((noinline))\ vec_sat_s_trunc_##NT##_##WT##_fmt_2 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ NT trunc = (NT)x; \ out[i] = (WT)NT_MIN < x && x < (WT)NT_MAX \ ? trunc \ : x < 0 ? NT_MIN : NT_MAX;\ } \ } The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i64-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i64-to-i8.c: New test. Signed-off-by: Pan Li Diff: --- .../rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-2-i64-to-i16.c| 9 + .../autovec/unop/vec_sat_s_trunc-2-i64-to-i32.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-2-i64-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-run-2-i16-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-2-i32-to-i16.c| 16 .../autovec/unop/vec_sat_s_trunc-run-2-i32-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-2-i64-to-i16.c| 16 .../unop/vec_sat_s_trunc-run-2-i64-to-i32.c| 16 .../autovec/unop/vec_sat_s_trunc-run-2-i64-to-i8.c | 16 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 22 ++ 13 files changed, 172 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c new file mode 100644 index ..3e26e788c083 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_2(int8_t, int16_t, INT8_MIN, INT8_MAX) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ +/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c new file mode 100644 index ..63797705a04a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_2(int16_t, int32_t, INT16_MIN, INT16_MAX) + +/* { dg-final { scan-rtl-dump-times
[gcc r15-4528] RISC-V: Add testcases for form 5 of vector signed SAT_TRUNC
https://gcc.gnu.org/g:108c8ef03dd5dff96fd3a4aa31088e42d98a0624 commit r15-4528-g108c8ef03dd5dff96fd3a4aa31088e42d98a0624 Author: Pan Li Date: Mon Oct 14 14:41:22 2024 +0800 RISC-V: Add testcases for form 5 of vector signed SAT_TRUNC Form 5: #define DEF_VEC_SAT_S_TRUNC_FMT_5(NT, WT, NT_MIN, NT_MAX) \ void __attribute__((noinline))\ vec_sat_s_trunc_##NT##_##WT##_fmt_5 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ NT trunc = (NT)x; \ out[i] = (WT)NT_MIN > x || x > (WT)NT_MAX \ ? x < 0 ? NT_MIN : NT_MAX \ : trunc; \ } \ } The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i64-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i16-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i32-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i32-to-i8.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i64-to-i16.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i64-to-i32.c: New test. * gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i64-to-i8.c: New test. Signed-off-by: Pan Li Diff: --- .../rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-5-i64-to-i16.c| 9 + .../autovec/unop/vec_sat_s_trunc-5-i64-to-i32.c| 9 + .../rvv/autovec/unop/vec_sat_s_trunc-5-i64-to-i8.c | 9 + .../autovec/unop/vec_sat_s_trunc-run-5-i16-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-5-i32-to-i16.c| 16 .../autovec/unop/vec_sat_s_trunc-run-5-i32-to-i8.c | 16 .../unop/vec_sat_s_trunc-run-5-i64-to-i16.c| 16 .../unop/vec_sat_s_trunc-run-5-i64-to-i32.c| 16 .../autovec/unop/vec_sat_s_trunc-run-5-i64-to-i8.c | 16 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h | 22 ++ 13 files changed, 172 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c new file mode 100644 index ..49c076ad2779 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_5(int8_t, int16_t, INT8_MIN, INT8_MAX) + +/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */ +/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c new file mode 100644 index ..a2a1aa40e017 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "../vec_sat_arith.h" + +DEF_VEC_SAT_S_TRUNC_FMT_5(int16_t, int32_t, INT16_MIN, INT16_MAX) + +/* { dg-final { scan-rtl-dump-times
[gcc r15-4523] RISC-V: Implement vector SAT_TRUNC for signed integer
https://gcc.gnu.org/g:b5a058154179ab16fe5f9e6aa331624363410aad commit r15-4523-gb5a058154179ab16fe5f9e6aa331624363410aad Author: Pan Li Date: Mon Oct 14 10:14:31 2024 +0800 RISC-V: Implement vector SAT_TRUNC for signed integer This patch would like to implement the sstrunc for vector signed integer. Form 1: #define DEF_VEC_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX) \ void __attribute__((noinline))\ vec_sat_s_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ NT trunc = (NT)x; \ out[i] = (WT)NT_MIN <= x && x <= (WT)NT_MAX \ ? trunc \ : x < 0 ? NT_MIN : NT_MAX;\ } \ } DEF_VEC_SAT_S_TRUNC_FMT_1(int32_t, int64_t, INT32_MIN, INT32_MAX) Before this patch: 27 │ vsetvli a5,a2,e64,m1,ta,ma 28 │ vle64.v v1,0(a1) 29 │ sllia3,a5,3 30 │ sllia4,a5,2 31 │ sub a2,a2,a5 32 │ add a1,a1,a3 33 │ vadd.vv v0,v1,v5 34 │ vsetvli zero,zero,e32,mf2,ta,ma 35 │ vnsrl.wxv2,v1,a6 36 │ vncvt.x.x.w v1,v1 37 │ vsetvli zero,zero,e64,m1,ta,ma 38 │ vmsgtu.vv v0,v0,v4 39 │ vsetvli zero,zero,e32,mf2,ta,mu 40 │ vneg.v v2,v2 41 │ vxor.vv v1,v2,v3,v0.t 42 │ vse32.v v1,0(a0) 43 │ add a0,a0,a4 44 │ bne a2,zero,.L3 After this patch: 16 │ vsetvli a5,a2,e32,mf2,ta,ma 17 │ vle64.v v1,0(a1) 18 │ sllia3,a5,3 19 │ sllia4,a5,2 20 │ sub a2,a2,a5 21 │ add a1,a1,a3 22 │ vnclip.wi v1,v1,0 23 │ vse32.v v1,0(a0) 24 │ add a0,a0,a4 25 │ bne a2,zero,.L3 The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/autovec.md (sstrunc2): Add new pattern sstrunc for double trunc. (sstrunc2): Ditto but for quad trunc. (sstrunc2): Ditto but for oct trunc. * config/riscv/riscv-protos.h (expand_vec_double_sstrunc): Add new func decl to expand double trunc. (expand_vec_quad_sstrunc): Ditto but for quad trunc. (expand_vec_oct_sstrunc): Ditto but for oct trunc. * config/riscv/riscv-v.cc (expand_vec_double_sstrunc): Add new func to expand double trunc. (expand_vec_quad_sstrunc): Ditto but for quad trunc. (expand_vec_oct_sstrunc): Ditto but for oct trunc. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/autovec.md | 34 ++ gcc/config/riscv/riscv-protos.h | 4 gcc/config/riscv/riscv-v.cc | 46 + 3 files changed, 84 insertions(+) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index a34f63c96516..774a3d337231 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2779,6 +2779,40 @@ } ) +(define_expand "sstrunc2" + [(match_operand: 0 "register_operand") + (match_operand:VWEXTI 1 "register_operand")] + "TARGET_VECTOR" + { +riscv_vector::expand_vec_double_sstrunc (operands[0], operands[1], + mode); +DONE; + } +) + +(define_expand "sstrunc2" + [(match_operand: 0 "register_operand") + (match_operand:VQEXTI 1 "register_operand")] + "TARGET_VECTOR" + { +riscv_vector::expand_vec_quad_sstrunc (operands[0], operands[1], mode, + mode); +DONE; + } +) + +(define_expand "sstrunc2" + [(match_operand: 0 "register_operand") + (match_operand:VOEXTI1 "register_operand")] + "TARGET_VECTOR" + { +riscv_vector::expand_vec_oct_sstrunc (operands[0], operands[1], mode, + mode, + mode); +DONE; + } +) + ;; = ;; == Early break auto-vectorization patterns ;; = diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/risc
[gcc r15-4520] aarch64: Fix costing of move to/from MOVEABLE_SYSREGS
https://gcc.gnu.org/g:8193e71a07de010c041175e7a8acf62eeae5b336 commit r15-4520-g8193e71a07de010c041175e7a8acf62eeae5b336 Author: Andrew Carlotti Date: Thu Aug 22 11:59:33 2024 +0100 aarch64: Fix costing of move to/from MOVEABLE_SYSREGS This is necessary to prevent reload assuming that a direct FP->FPMR move is valid. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_register_move_cost): Increase costs involving MOVEABLE_SYSREGS. Diff: --- gcc/config/aarch64/aarch64.cc | 6 ++ 1 file changed, 6 insertions(+) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 0dbc2aaa99ff..21d9a6b5a20e 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -15565,6 +15565,12 @@ aarch64_register_move_cost (machine_mode mode, reg_class_contents[FFR_REGS])) return 80; + /* Moves to/from sysregs are expensive, and must go via GPR. */ + if (from == MOVEABLE_SYSREGS) +return 80 + aarch64_register_move_cost (mode, GENERAL_REGS, to); + if (to == MOVEABLE_SYSREGS) +return 80 + aarch64_register_move_cost (mode, from, GENERAL_REGS); + /* Moving between GPR and stack cost is the same as GP2GP. */ if ((from == GENERAL_REGS && to == STACK_REG) || (to == GENERAL_REGS && from == STACK_REG))
[gcc r15-4522] Vect: Try the pattern of vector signed integer SAT_TRUNC
https://gcc.gnu.org/g:2987ca61003ee7d55b8b005ab4c9c679efc9558b commit r15-4522-g2987ca61003ee7d55b8b005ab4c9c679efc9558b Author: Pan Li Date: Mon Oct 14 10:09:31 2024 +0800 Vect: Try the pattern of vector signed integer SAT_TRUNC Almost the same as vector unsigned integer SAT_TRUNC, try to match the signed version during the vector pattern matching. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * tree-vect-patterns.cc (gimple_signed_integer_sat_trunc): Add new func decl for signed SAT_TRUNC. (vect_recog_sat_trunc_pattern): Try signed match pattern for the SAT_TRUNC. Signed-off-by: Pan Li Diff: --- gcc/tree-vect-patterns.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 746f100a0842..ce5a528141f7 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -4539,6 +4539,7 @@ extern bool gimple_unsigned_integer_sat_trunc (tree, tree*, tree (*)(tree)); extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree)); extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree)); +extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree)); static gimple * vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info, @@ -4770,7 +4771,8 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, tree lhs = gimple_assign_lhs (last_stmt); tree otype = TREE_TYPE (lhs); - if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL) + if ((gimple_unsigned_integer_sat_trunc (lhs, ops, NULL) + || gimple_signed_integer_sat_trunc (lhs, ops, NULL)) && type_has_mode_precision_p (otype)) { tree itype = TREE_TYPE (ops[0]);
[gcc r15-4521] Match: Support form 1 for vector signed integer SAT_TRUNC
https://gcc.gnu.org/g:bdbb74e38f30827568ba1224d52f5c86edb5d48c commit r15-4521-gbdbb74e38f30827568ba1224d52f5c86edb5d48c Author: Pan Li Date: Mon Oct 14 10:03:25 2024 +0800 Match: Support form 1 for vector signed integer SAT_TRUNC This patch would like to support the form 1 of the vector signed integer SAT_TRUNC. Aka below example: Form 1: #define DEF_VEC_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX) \ void __attribute__((noinline))\ vec_sat_s_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ WT x = in[i]; \ NT trunc = (NT)x; \ out[i] = (WT)NT_MIN <= x && x <= (WT)NT_MAX \ ? trunc \ : x < 0 ? NT_MIN : NT_MAX;\ } \ } DEF_VEC_SAT_S_TRUNC_FMT_1(int32_t, int64_t, INT32_MIN, INT32_MAX) Before this patch: 48 │ _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [2, 2]); 49 │ ivtmp_64 = _87 * 8; 50 │ vect_x_14.10_67 = .MASK_LEN_LOAD (vectp_in.8_65, 64B, { -1, ... }, _87, 0); 51 │ vect_trunc_15.21_78 = (vector([2,2]) int) vect_x_14.10_67; 52 │ _61 = VIEW_CONVERT_EXPR(vect_x_14.10_67); 53 │ _32 = _61 >> 63; 54 │ vect_patt_52.16_73 = (vector([2,2]) int) _32; 55 │ vect__46.17_74 = VIEW_CONVERT_EXPR(vect_patt_52.16_73); 56 │ vect__47.18_75 = -vect__46.17_74; 57 │ vect__21.19_76 = VIEW_CONVERT_EXPR(vect__47.18_75); 58 │ vect_x.11_68 = VIEW_CONVERT_EXPR(vect_x_14.10_67); 59 │ vect__5.12_69 = vect_x.11_68 + { 2147483648, ... }; 60 │ mask__34.13_70 = vect__5.12_69 > { 4294967295, ... }; 61 │ _25 = .COND_XOR (mask__34.13_70, vect__21.19_76, { 2147483647, ... }, vect_trunc_15.21_78); 62 │ ivtmp_80 = _87 * 4; 63 │ .MASK_LEN_STORE (vectp_out.23_81, 32B, { -1, ... }, _87, 0, _25); 64 │ vectp_in.8_66 = vectp_in.8_65 + ivtmp_64; 65 │ vectp_out.23_82 = vectp_out.23_81 + ivtmp_80; 66 │ ivtmp_86 = ivtmp_85 - _87; After this patch: 38 │ _77 = .SELECT_VL (ivtmp_75, POLY_INT_CST [2, 2]); 39 │ ivtmp_65 = _77 * 8; 40 │ vect_x_14.10_68 = .MASK_LEN_LOAD (vectp_in.8_66, 64B, { -1, ... }, _77, 0); 41 │ vect_patt_53.11_69 = .SAT_TRUNC (vect_x_14.10_68); 42 │ ivtmp_70 = _77 * 4; 43 │ .MASK_LEN_STORE (vectp_out.12_71, 32B, { -1, ... }, _77, 0, vect_patt_53.11_69); 44 │ vectp_in.8_67 = vectp_in.8_66 + ivtmp_65; 45 │ vectp_out.12_72 = vectp_out.12_71 + ivtmp_70; 46 │ ivtmp_76 = ivtmp_75 - _77; The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Refine matching for vector signed SAT_TRUNC form 1. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index 12d81fcac0de..ec2038d48dc4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3482,7 +3482,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) SAT_S_TRUNC(X) = (unsigned)X + NT_MAX + 1 > Unsigned_MAX ? (NT)X. */ (match (signed_integer_sat_trunc @0) (cond^ (gt (plus:c (convert@4 @0) INTEGER_CST@1) INTEGER_CST@2) - (bit_xor:c (negate (convert (lt @0 integer_zerop))) INTEGER_CST@3) + (bit_xor:c (nop_convert? + (negate (nop_convert? (convert (lt @0 integer_zerop) + INTEGER_CST@3) (convert @0)) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && !TYPE_UNSIGNED (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@4)))
[gcc(refs/users/jmelcr/heads/omp-cp)] omp-cp: fix flags when cloning edges, add lto input and output
https://gcc.gnu.org/g:f2c71e4d68dce5a51aedd0f71a18eec4ad76ff17 commit f2c71e4d68dce5a51aedd0f71a18eec4ad76ff17 Author: Josef Melcr Date: Mon Oct 21 16:31:32 2024 +0200 omp-cp: fix flags when cloning edges, add lto input and output gcc/ChangeLog: * cgraph.cc (cgraph_edge::dump_edge_flags): add callback and has_callback printing * cgraphclones.cc (cgraph_edge::clone): copy over callback and has_callback flags * ipa-inline.cc (can_inline_edge_p): move callback condition to the beginning * lto-cgraph.cc (lto_output_edge): add outputting for callback flags (input_edge): add inputting of callback flags Signed-off-by: Josef Melcr Diff: --- gcc/cgraph.cc | 4 gcc/cgraphclones.cc | 2 ++ gcc/ipa-inline.cc | 10 +- gcc/lto-cgraph.cc | 4 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc index 58813e8cc2d0..c62f5de807da 100644 --- a/gcc/cgraph.cc +++ b/gcc/cgraph.cc @@ -2113,6 +2113,10 @@ cgraph_edge::dump_edge_flags (FILE *f) { if (speculative) fprintf (f, "(speculative) "); + if (callback) +fprintf (f, "(callback) "); + if (has_callback) +fprintf (f, "(has_callback) "); if (!inline_failed) fprintf (f, "(inlined) "); if (call_stmt_cannot_inline_p) diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc index 4fff6873a369..d52b72364d4d 100644 --- a/gcc/cgraphclones.cc +++ b/gcc/cgraphclones.cc @@ -144,6 +144,8 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid, new_edge->can_throw_external = can_throw_external; new_edge->call_stmt_cannot_inline_p = call_stmt_cannot_inline_p; new_edge->speculative = speculative; + new_edge->callback = callback; + new_edge->has_callback = has_callback; new_edge->in_polymorphic_cdtor = in_polymorphic_cdtor; /* Update IPA profile. Local profiles need no updating in original. */ diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc index 0d77b89fa301..dacf1fd2691b 100644 --- a/gcc/ipa-inline.cc +++ b/gcc/ipa-inline.cc @@ -371,6 +371,11 @@ can_inline_edge_p (struct cgraph_edge *e, bool report, { gcc_checking_assert (e->inline_failed); + if(e->callback) { +printf("skipping inline - callback\n"); +return false; + } + if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR) { if (report) @@ -453,11 +458,6 @@ can_inline_edge_p (struct cgraph_edge *e, bool report, if (!inlinable && report) report_inline_failed_reason (e); - if(e->callback) { -printf("skipping inline - tried to inline: %d\n", inlinable); -inlinable = false; - } - return inlinable; } diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc index 1d4311a8832b..b7a7def31b79 100644 --- a/gcc/lto-cgraph.cc +++ b/gcc/lto-cgraph.cc @@ -272,6 +272,8 @@ lto_output_edge (struct lto_simple_output_block *ob, struct cgraph_edge *edge, bp_pack_value (&bp, edge->speculative_id, 16); bp_pack_value (&bp, edge->indirect_inlining_edge, 1); bp_pack_value (&bp, edge->speculative, 1); + bp_pack_value (&bp, edge->callback, 1); + bp_pack_value (&bp, edge->has_callback, 1); bp_pack_value (&bp, edge->call_stmt_cannot_inline_p, 1); gcc_assert (!edge->call_stmt_cannot_inline_p || edge->inline_failed != CIF_BODY_NOT_AVAILABLE); @@ -1524,6 +1526,8 @@ input_edge (class lto_input_block *ib, vec nodes, edge->indirect_inlining_edge = bp_unpack_value (&bp, 1); edge->speculative = bp_unpack_value (&bp, 1); + edge->callback = bp_unpack_value(&bp, 1); + edge->has_callback = bp_unpack_value(&bp, 1); edge->lto_stmt_uid = stmt_id; edge->speculative_id = speculative_id; edge->inline_failed = inline_failed;
[gcc(refs/users/omachota/heads/rtl-ssa-dce)] rtl-ssa: dce fix uid
https://gcc.gnu.org/g:cde5332b496943e584748870e65265549102077f commit cde5332b496943e584748870e65265549102077f Author: Ondřej Machota Date: Mon Oct 21 16:54:52 2024 +0200 rtl-ssa: dce fix uid Diff: --- gcc/dce.cc | 44 +++- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/gcc/dce.cc b/gcc/dce.cc index cde7d7f3c83d..716236d79c1b 100644 --- a/gcc/dce.cc +++ b/gcc/dce.cc @@ -1239,6 +1239,7 @@ namespace bool is_inherently_live(insn_info *insn) { + return insn->num_uses() > 0; } static void @@ -1262,6 +1263,17 @@ rtl_ssa_dce_done() fprintf(dump_file, "\nFinished running rtl_ssa_dce\n\n"); } +static void +rtl_ssa_dce_mark_live(insn_info *info, auto_vec worklist, sbitmap marked) { + int info_uid = info->uid(); + bitmap_set_bit(marked, info_uid); + if (dump_file) { +fprintf(dump_file, " Adding insn %d to worklist\n", info_uid); + } + + worklist.safe_push(info); +} + static void rtl_ssa_dce_mark(sbitmap marked) { @@ -1279,12 +1291,19 @@ rtl_ssa_dce_mark(sbitmap marked) */ // insn.defs() // UD chain - this is what I want - reach the ancestors\ // insn.uses() // DU chain + +/* +* For marking phi nodes, which don't have uid (insn->rtl() is null) by definition, use a dictionary and store their addresses +* Is seems, that insn->uid() is uniq enough +*/ + if (is_inherently_live(insn)) { if (dump_file) -fprintf(dump_file, " Adding insn %d to worklist\n", INSN_UID(insn->rtl())); +fprintf(dump_file, " Adding insn %d to worklist\n", insn->uid()); + rtl_ssa_dce_mark_live(insn, marked); worklist.safe_push(insn); - bitmap_set_bit(marked, INSN_UID(insn->rtl())); + bitmap_set_bit(marked, insn->uid()); } // if (insn->can_be_optimized () || insn->is_debug_insn ()) @@ -1301,12 +1320,13 @@ rtl_ssa_dce_mark(sbitmap marked) insn_info *parent_insn = defs[i]->insn(); - if (!bitmap_bit_p(marked, INSN_UID(parent_insn->rtl( + int parent_insn_uid = parent_insn->uid(); + if (!bitmap_bit_p(marked, parent_insn_uid)) { if (dump_file) - fprintf(dump_file, " Adding insn %d to worklist\n", INSN_UID(parent_insn->rtl())); + fprintf(dump_file, " Adding insn %d to worklist\n", parent_insn_uid); worklist.safe_push(parent_insn); -bitmap_set_bit(marked, INSN_UID(parent_insn->rtl())); +bitmap_set_bit(marked, parent_insn_uid); } } } @@ -1318,10 +1338,16 @@ rtl_ssa_dce_sweep(sbitmap marked) insn_info *next; for (insn_info *insn = crtl->ssa->first_insn(); insn; insn = next) { - if (!bitmap_bit_p(marked, INSN_UID(insn->rtl( { -insn->rtl()->set_deleted(); -// delete - } +if (!bitmap_bit_p(marked, insn->uid())) { + // rtx_insn* rtl = insn->rtl(); + // How to delete phis? + // if (rtl != nullptr) { + // delete_insn(rtl); + // } + // insn_change::delete_insn(insn); + crtl->ssa->possibly_queue_changes(insn_change::delete_insn(insn)) + // insn->rtl()->set_deleted(); +} } }
[gcc r15-4535] RISC-V: Add testcases for unsigned .SAT_SUB form 1 with IMM = max -1.
https://gcc.gnu.org/g:93b6f287814bca3d10bcf53bb64db40d77eff5d7 commit r15-4535-g93b6f287814bca3d10bcf53bb64db40d77eff5d7 Author: xuli Date: Mon Oct 21 04:01:01 2024 + RISC-V: Add testcases for unsigned .SAT_SUB form 1 with IMM = max -1. form 1: T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_1 (T y) \ { \ return (T)IMM >= y ? (T)IMM - y : 0; \ } Passed the rv64gcv regression test. Change-Id: Idaa1ab41f2a5785112279ea8ee2c93236457b740 Signed-off-by: Li Xu gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_sub_imm-1_3.c: New test. * gcc.target/riscv/sat_u_sub_imm-2_3.c: New test. * gcc.target/riscv/sat_u_sub_imm-3_3.c: New test. * gcc.target/riscv/sat_u_sub_imm-4_1.c: New test. Diff: --- gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_3.c | 21 ++ gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_3.c | 23 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_3.c | 25 ++ gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_1.c | 20 + 4 files changed, 89 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_3.c b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_3.c new file mode 100644 index ..6f2a493eebbe --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_3.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_sub_imm254_uint8_t_fmt_1: +** li\s+[atx][0-9]+,\s*254 +** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0 +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** addi\s+a0,\s*a0,\s*-1 +** and\s+a0,\s*a0,\s*[atx][0-9]+ +** andi\s+a0,\s*a0,\s*0xff +** ret +*/ + +DEF_SAT_U_SUB_IMM_FMT_1(uint8_t, 254) + +/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_3.c b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_3.c new file mode 100644 index ..ed03c186046a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_3.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_sub_imm65534_uint16_t_fmt_1: +** li\s+[atx][0-9]+,\s*65536 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-2 +** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0 +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** addi\s+a0,\s*a0,\s*-1 +** and\s+a0,\s*a0,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ + +DEF_SAT_U_SUB_IMM_FMT_1(uint16_t, 65534) + +/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_3.c b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_3.c new file mode 100644 index ..17d8e5f0b9fd --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_3.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_sub_imm4294967294_uint32_t_fmt_1: +** li\s+[atx][0-9]+,\s*1 +** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32 +** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-2 +** slli\s+a0,\s*a0,\s*32 +** srli\s+a0,\s*a0,\s*32 +** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0 +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** addi\s+a0,\s*a0,\s*-1 +** and\s+a0,\s*a0,\s*[atx][0-9]+ +** sext\.w\s+a0,\s*a0 +** ret +*/ + +DEF_SAT_U_SUB_IMM_FMT_1(uint32_t, 4294967294) + +/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_1.c b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_1.c new file mode 100644 index ..e6492190d171 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_sub_imm18446744073709551614u_uint64_t_fmt_1: +** li\s+[atx][0-9]+,\s*-2 +** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0 +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** addi\s+a0,\s*a0,\s*-1 +** and\s+a0,\s*a0,\s*[atx][0-9]+ +** ret +*/ + +DEF_SAT_U_SUB_IMM_FMT_1
[gcc r15-4534] Match: Support IMM=max-1 for unsigned scalar .SAT_SUB IMM form 1
https://gcc.gnu.org/g:1dccec47ab679926521fd4c9963b63b319b56eb9 commit r15-4534-g1dccec47ab679926521fd4c9963b63b319b56eb9 Author: xuli Date: Tue Oct 22 01:08:56 2024 + Match: Support IMM=max-1 for unsigned scalar .SAT_SUB IMM form 1 This patch would like to support .SAT_SUB when one of the op is IMM = max - 1 of form1. Form 1: #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_1 (T y) \ { \ return IMM >= y ? IMM - y : 0;\ } Take below form 1 as example: DEF_SAT_U_SUB_IMM_FMT_1(uint8_t, 254) Before this patch: __attribute__((noinline)) uint8_t sat_u_sub_imm254_uint8_t_fmt_1 (uint8_t y) { uint8_t _1; uint8_t _3; [local count: 1073741824]: if (y_2(D) != 255) goto ; [66.00%] else goto ; [34.00%] [local count: 708669600]: _3 = 254 - y_2(D); [local count: 1073741824]: # _1 = PHI <0(2), _3(3)> return _1; } After this patch: __attribute__((noinline)) uint8_t sat_u_sub_imm254_uint8_t_fmt_1 (uint8_t y) { uint8_t _1; [local count: 1073741824]: _1 = .SAT_SUB (254, y_2(D)); [tail call] return _1; } The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. Signed-off-by: Li Xu gcc/ChangeLog: * match.pd: Support IMM=max-1. Diff: --- gcc/match.pd | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index ec2038d48dc4..362bcac291fd 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3325,7 +3325,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1 -/* Unsigned saturation sub with op_0 imm, case 9 (branch with gt): +/* Unsigned saturation sub with op_0 imm, case 9 (branch with le): SAT_U_SUB = IMM > Y ? (IMM - Y) : 0. = IMM >= Y ? (IMM - Y) : 0. */ (match (unsigned_integer_sat_sub @0 @1) @@ -3344,6 +3344,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) } (if (equal_p || less_than_1_p) +/* The boundary condition for case 9: IMM = max -1 (branch with ne): + SAT_U_SUB = IMM >= Y ? (IMM - Y) : 0. */ +(match (unsigned_integer_sat_sub @0 @1) + (cond^ (ne @1 INTEGER_CST@2) (minus INTEGER_CST@0 @1) integer_zerop) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @1)) +(with + { + unsigned precision = TYPE_PRECISION (type); + wide_int max = wi::mask (precision, false, precision); + wide_int c0 = wi::to_wide (@0); + wide_int c2 = wi::to_wide (@2); + wide_int c0_add_1 = wi::add (c0, wi::uhwi (1, precision)); + } + (if (wi::eq_p (c2, max) && wi::eq_p (c0_add_1, max)) + /* Unsigned saturation sub with op_1 imm, case 10: SAT_U_SUB = X > IMM ? (X - IMM) : 0. = X >= IMM ? (X - IMM) : 0. */
[gcc r15-4536] Match: Support IMM=1 for unsigned scalar .SAT_SUB IMM form 1
https://gcc.gnu.org/g:4e65e12a9a34d76f9a43fbc7ae32875a909ac708 commit r15-4536-g4e65e12a9a34d76f9a43fbc7ae32875a909ac708 Author: xuli Date: Mon Oct 21 04:08:46 2024 + Match: Support IMM=1 for unsigned scalar .SAT_SUB IMM form 1 This patch would like to support .SAT_SUB when one of the op is IMM = 1 of form1. Form 1: #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \ T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_1 (T y) \ { \ return IMM >= y ? IMM - y : 0;\ } Take below form 1 as example: DEF_SAT_U_SUB_IMM_FMT_1(uint8_t, 1) Before this patch: __attribute__((noinline)) uint8_t sat_u_sub_imm1_uint8_t_fmt_1 (uint8_t y) { uint8_t _1; uint8_t _3; [local count: 1073741824]: if (y_2(D) <= 1) goto ; [41.00%] else goto ; [59.00%] [local count: 440234144]: _3 = y_2(D) ^ 1; [local count: 1073741824]: # _1 = PHI <0(2), _3(3)> return _1; } After this patch: __attribute__((noinline)) uint8_t sat_u_sub_imm1_uint8_t_fmt_1 (uint8_t y) { uint8_t _1; ;; basic block 2, loop depth 0 ;;pred: ENTRY _1 = .SAT_SUB (1, y_2(D)); [tail call] return _1; ;;succ: EXIT } The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. Signed-off-by: Li Xu gcc/ChangeLog: * match.pd: Support IMM=1. Diff: --- gcc/match.pd | 7 +++ 1 file changed, 7 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 362bcac291fd..0455dfa69937 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3360,6 +3360,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) } (if (wi::eq_p (c2, max) && wi::eq_p (c0_add_1, max)) +/* The boundary condition for case 9: IMM = 1 (branch with le): + SAT_U_SUB = IMM >= Y ? (IMM - Y) : 0. */ +(match (unsigned_integer_sat_sub @0 @1) + (cond^ (le @1 integer_onep@0) (bit_xor @1 integer_onep@0) integer_zerop) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @1 + /* Unsigned saturation sub with op_1 imm, case 10: SAT_U_SUB = X > IMM ? (X - IMM) : 0. = X >= IMM ? (X - IMM) : 0. */
[gcc r15-4537] RISC-V: Add testcases for unsigned .SAT_SUB form 1 with IMM = 1.
https://gcc.gnu.org/g:adf4ece4dc48deb1d1790efe104fa0cbcc22c0b6 commit r15-4537-gadf4ece4dc48deb1d1790efe104fa0cbcc22c0b6 Author: xuli Date: Mon Oct 21 04:10:14 2024 + RISC-V: Add testcases for unsigned .SAT_SUB form 1 with IMM = 1. form 1: T __attribute__((noinline)) \ sat_u_sub_imm##IMM##_##T##_fmt_1 (T y) \ { \ return (T)IMM >= y ? (T)IMM - y : 0; \ } Passed the rv64gcv regression test. Change-Id: I8805225b445cdbbc685f4f54a4d66c7ee8f748e1 Signed-off-by: Li Xu gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_u_sub_imm-1_4.c: New test. * gcc.target/riscv/sat_u_sub_imm-2_4.c: New test. * gcc.target/riscv/sat_u_sub_imm-3_4.c: New test. * gcc.target/riscv/sat_u_sub_imm-4_2.c: New test. Diff: --- gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_4.c | 21 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_4.c | 22 + gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_4.c | 23 ++ gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_2.c | 20 +++ 4 files changed, 86 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_4.c b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_4.c new file mode 100644 index ..9229f3110848 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_4.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_sub_imm1_uint8_t_fmt_1: +** li\s+[atx][0-9]+,\s*1 +** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0 +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** addi\s+a0,\s*a0,\s*-1 +** and\s+a0,\s*a0,\s*[atx][0-9]+ +** andi\s+a0,\s*a0,\s*0xff +** ret +*/ + +DEF_SAT_U_SUB_IMM_FMT_1(uint8_t, 1) + +/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_4.c b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_4.c new file mode 100644 index ..db3294838901 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_4.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_sub_imm1_uint16_t_fmt_1: +** li\s+[atx][0-9]+,\s*1 +** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0 +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** addi\s+a0,\s*a0,\s*-1 +** and\s+a0,\s*a0,\s*[atx][0-9]+ +** slli\s+a0,\s*a0,\s*48 +** srli\s+a0,\s*a0,\s*48 +** ret +*/ + +DEF_SAT_U_SUB_IMM_FMT_1(uint16_t, 1) + +/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_4.c b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_4.c new file mode 100644 index ..8073ee927fc4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_4.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_sub_imm1_uint32_t_fmt_1: +** li\s+[atx][0-9]+,\s*1 +** slli\s+a0,\s*a0,\s*32 +** srli\s+a0,\s*a0,\s*32 +** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0 +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** addi\s+a0,\s*a0,\s*-1 +** and\s+a0,\s*a0,\s*[atx][0-9]+ +** sext\.w\s+a0,\s*a0 +** ret +*/ + +DEF_SAT_U_SUB_IMM_FMT_1(uint32_t, 1) + +/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_2.c b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_2.c new file mode 100644 index ..9a1ec6edf657 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_2.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "sat_arith.h" + +/* +** sat_u_sub_imm1_uint64_t_fmt_1: +** li\s+[atx][0-9]+,\s*1 +** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0 +** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ +** addi\s+a0,\s*a0,\s*-1 +** and\s+a0,\s*a0,\s*[atx][0-9]+ +** ret +*/ + +DEF_SAT_U_SUB_IMM_FMT_1(uint64_t, 1) + +/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
[gcc r14-10819] libstdc++/ranges: Implement various small LWG issues
https://gcc.gnu.org/g:07ee6874963d2f8a787ba48341a5392ee8b6ba56 commit r14-10819-g07ee6874963d2f8a787ba48341a5392ee8b6ba56 Author: Patrick Palka Date: Fri Oct 4 10:01:39 2024 -0400 libstdc++/ranges: Implement various small LWG issues This implements the following small LWG issues: 3848. adjacent_view, adjacent_transform_view and slide_view missing base accessor 3851. chunk_view::inner-iterator missing custom iter_move and iter_swap 3947. Unexpected constraints on adjacent_transform_view::base() 4001. iota_view should provide empty 4012. common_view::begin/end are missing the simple-view check 4013. lazy_split_view::outer-iterator::value_type should not provide default constructor 4035. single_view should provide empty 4053. Unary call to std::views::repeat does not decay the argument 4054. Repeating a repeat_view should repeat the view libstdc++-v3/ChangeLog: * include/std/ranges (single_view::empty): Define as per LWG 4035. (iota_view::empty): Define as per LWG 4001. (lazy_split_view::_OuterIter::value_type): Remove default constructor and make other constructor private as per LWG 4013. (common_view::begin): Disable non-const overload for simple views as per LWG 4012. (common_view::end): Likewise. (adjacent_view::base): Define as per LWG 3848. (adjacent_transform_view::base): Likewise. (chunk_view::_InnerIter::iter_move): Define as per LWG 3851. (chunk_view::_InnerIter::itep_swap): Likewise. (slide_view::base): Define as per LWG 3848. (repeat_view): Adjust deduction guide as per LWG 4053. (_Repeat::operator()): Adjust single-parameter overload as per LWG 4054. * testsuite/std/ranges/adaptors/adjacent/1.cc: Verify existence of base member function. * testsuite/std/ranges/adaptors/adjacent_transform/1.cc: Likewise. * testsuite/std/ranges/adaptors/chunk/1.cc: Test LWG 3851 example. * testsuite/std/ranges/adaptors/slide/1.cc: Verify existence of base member function. * testsuite/std/ranges/iota/iota_view.cc: Test LWG 4001 example. * testsuite/std/ranges/repeat/1.cc: Test LWG 4053/4054 examples. Reviewed-by: Jonathan Wakely (cherry picked from commit 20165d0107abd0f839f2519818b904f029f4ae55) Diff: --- libstdc++-v3/include/std/ranges| 84 +++--- .../testsuite/std/ranges/adaptors/adjacent/1.cc| 3 + .../std/ranges/adaptors/adjacent_transform/1.cc| 3 + .../testsuite/std/ranges/adaptors/chunk/1.cc | 15 .../testsuite/std/ranges/adaptors/slide/1.cc | 3 + .../testsuite/std/ranges/iota/iota_view.cc | 12 libstdc++-v3/testsuite/std/ranges/repeat/1.cc | 23 ++ 7 files changed, 135 insertions(+), 8 deletions(-) diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges index 2c8a8535d396..c94463c83e53 100644 --- a/libstdc++-v3/include/std/ranges +++ b/libstdc++-v3/include/std/ranges @@ -331,6 +331,12 @@ namespace ranges end() const noexcept { return data() + 1; } + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 4035. single_view should provide empty + static constexpr bool + empty() noexcept + { return false; } + static constexpr size_t size() noexcept { return 1; } @@ -691,6 +697,12 @@ namespace ranges end() const requires same_as<_Winc, _Bound> { return _Iterator{_M_bound}; } + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 4001. iota_view should provide empty + constexpr bool + empty() const + { return _M_value == _M_bound; } + constexpr auto size() const requires (same_as<_Winc, _Bound> && __detail::__advanceable<_Winc>) @@ -3350,14 +3362,17 @@ namespace views::__adaptor private: _OuterIter _M_i = _OuterIter(); - public: - value_type() = default; - + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 4013. lazy_split_view::outer-iterator::value_type should not + // provide default constructor constexpr explicit value_type(_OuterIter __i) : _M_i(std::move(__i)) { } + friend _OuterIter; + + public: constexpr _InnerIter<_Const> begin() const { return _InnerIter<_Const>{_M_i}; } @@ -3949,8 +3964,10 @@ namespace views::__adaptor base() && { return std::move(_M_base); } + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 4012. common_view::begin/end are missing the simple-view check constexpr auto - begin() + begin() requires (!__detail::__simple_view<_Vp>) { if constexpr (random_access_range<_Vp> && sized_range<_Vp>) ret
[gcc r14-10816] libstdc++: Implement P2609R3 changes to the indirect invocability concepts
https://gcc.gnu.org/g:3795ac860bc6f24d0ef222045dff7b2a6350a8c4 commit r14-10816-g3795ac860bc6f24d0ef222045dff7b2a6350a8c4 Author: Patrick Palka Date: Thu Aug 22 09:24:11 2024 -0400 libstdc++: Implement P2609R3 changes to the indirect invocability concepts This implements the changes of this C++23 paper as a DR against C++20. Note that after the later P2538R1 "ADL-proof std::projected" (which we already implement), we can't use a simple partial specialization to match specializations of the 'projected' alias template. So instead we identify such specializations using a pair of distinguishing member aliases. libstdc++-v3/ChangeLog: * include/bits/iterator_concepts.h (__detail::__indirect_value): Define. (__indirect_value_t): Define as per P2609R3. (iter_common_reference_t): Adjust as per P2609R3. (indirectly_unary_invocable): Likewise. (indirectly_regular_unary_invocable): Likewise. (indirect_unary_predicate): Likewise. (indirect_binary_predicate): Likewise. (indirect_equivalence_relation): Likewise. (indirect_strict_weak_order): Likewise. (__detail::__projected::__type): Define member aliases __projected_Iter and __projected_Proj providing the template arguments of the current specialization. * include/bits/version.def (ranges): Update value. * include/bits/version.h: Regenerate. * testsuite/24_iterators/indirect_callable/p2609r3.cc: New test. * testsuite/std/ranges/version_c++23.cc: Update expected value of __cpp_lib_ranges macro. Reviewed-by: Jonathan Wakely (cherry picked from commit b552730faf36f1eae1dc6e73ccc93a016dec5401) Diff: --- libstdc++-v3/include/bits/iterator_concepts.h | 61 -- libstdc++-v3/include/bits/version.def | 2 +- libstdc++-v3/include/bits/version.h| 4 +- .../24_iterators/indirect_callable/p2609r3.cc | 27 ++ libstdc++-v3/testsuite/std/ranges/version_c++23.cc | 2 +- 5 files changed, 77 insertions(+), 19 deletions(-) diff --git a/libstdc++-v3/include/bits/iterator_concepts.h b/libstdc++-v3/include/bits/iterator_concepts.h index ce0b8a10f88f..9306b7bd194c 100644 --- a/libstdc++-v3/include/bits/iterator_concepts.h +++ b/libstdc++-v3/include/bits/iterator_concepts.h @@ -552,9 +552,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION concept indirectly_readable = __detail::__indirectly_readable_impl>; + namespace __detail + { +template + struct __indirect_value + { using type = iter_value_t<_Tp>&; }; + +// __indirect_value> is defined later. + } // namespace __detail + + template +using __indirect_value_t = typename __detail::__indirect_value<_Tp>::type; + template using iter_common_reference_t - = common_reference_t, iter_value_t<_Tp>&>; + = common_reference_t, __indirect_value_t<_Tp>>; /// Requirements for writing a value into an iterator's referenced object. template @@ -710,24 +722,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template concept indirectly_unary_invocable = indirectly_readable<_Iter> - && copy_constructible<_Fn> && invocable<_Fn&, iter_value_t<_Iter>&> + && copy_constructible<_Fn> && invocable<_Fn&, __indirect_value_t<_Iter>> && invocable<_Fn&, iter_reference_t<_Iter>> && invocable<_Fn&, iter_common_reference_t<_Iter>> - && common_reference_with&>, + && common_reference_with>, invoke_result_t<_Fn&, iter_reference_t<_Iter>>>; template concept indirectly_regular_unary_invocable = indirectly_readable<_Iter> && copy_constructible<_Fn> - && regular_invocable<_Fn&, iter_value_t<_Iter>&> + && regular_invocable<_Fn&, __indirect_value_t<_Iter>> && regular_invocable<_Fn&, iter_reference_t<_Iter>> && regular_invocable<_Fn&, iter_common_reference_t<_Iter>> - && common_reference_with&>, + && common_reference_with>, invoke_result_t<_Fn&, iter_reference_t<_Iter>>>; template concept indirect_unary_predicate = indirectly_readable<_Iter> - && copy_constructible<_Fn> && predicate<_Fn&, iter_value_t<_Iter>&> + && copy_constructible<_Fn> && predicate<_Fn&, __indirect_value_t<_Iter>> && predicate<_Fn&, iter_reference_t<_Iter>> && predicate<_Fn&, iter_common_reference_t<_Iter>>; @@ -735,9 +747,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION concept indirect_binary_predicate = indirectly_readable<_I1> && indirectly_readable<_I2> && copy_constructible<_Fn> - && predicate<_Fn&, iter_value_t<_I1>&, iter_value_t<_I2>&> - && predicate<_Fn&, iter_value_t<_I1>&, iter_reference_t<_I2>> - && predicate<_Fn&, iter_reference_t<_I1>, iter_value_t<_I2>&> + && predicate<_Fn&, __indirect_value_t
[gcc r14-10817] libstdc++: Implement P2997R1 changes to the indirect invocability concepts
https://gcc.gnu.org/g:0b2f2a7e126cf8017626793446110aac892b00f6 commit r14-10817-g0b2f2a7e126cf8017626793446110aac892b00f6 Author: Patrick Palka Date: Thu Aug 22 09:24:20 2024 -0400 libstdc++: Implement P2997R1 changes to the indirect invocability concepts This implements the changes of this C++26 paper as a DR against C++20. In passing this patch removes the std/ranges/version_c++23.cc test which is now mostly obsolete after the version.def FTM refactoring, and instead expands the __cpp_lib_ranges checks in another test so that it verifies the exact value of the FTM on a per language version basis. libstdc++-v3/ChangeLog: * include/bits/iterator_concepts.h (indirectly_unary_invocable): Relax as per P2997R1. (indirectly_regular_unary_invocable): Likewise. (indirect_unary_predicate): Likewise. (indirect_binary_predicate): Likewise. (indirect_equivalence_relation): Likewise. (indirect_strict_weak_order): Likewise. * include/bits/version.def (ranges): Update value for C++26. * include/bits/version.h: Regenerate. * testsuite/24_iterators/indirect_callable/p2997r1.cc: New test. * testsuite/std/ranges/version_c++23.cc: Remove. * testsuite/std/ranges/headers/ranges/synopsis.cc: Refine the __cpp_lib_ranges checks. Reviewed-by: Jonathan Wakely (cherry picked from commit 620232426bd83a79c81cd2be6f485834c618e920) Diff: --- libstdc++-v3/include/bits/iterator_concepts.h | 17 ++ libstdc++-v3/include/bits/version.def | 5 ++ libstdc++-v3/include/bits/version.h| 7 ++- .../24_iterators/indirect_callable/p2997r1.cc | 37 .../std/ranges/headers/ranges/synopsis.cc | 6 +- libstdc++-v3/testsuite/std/ranges/version_c++23.cc | 70 -- 6 files changed, 57 insertions(+), 85 deletions(-) diff --git a/libstdc++-v3/include/bits/iterator_concepts.h b/libstdc++-v3/include/bits/iterator_concepts.h index 9306b7bd194c..d849ddc32fc2 100644 --- a/libstdc++-v3/include/bits/iterator_concepts.h +++ b/libstdc++-v3/include/bits/iterator_concepts.h @@ -724,7 +724,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION concept indirectly_unary_invocable = indirectly_readable<_Iter> && copy_constructible<_Fn> && invocable<_Fn&, __indirect_value_t<_Iter>> && invocable<_Fn&, iter_reference_t<_Iter>> - && invocable<_Fn&, iter_common_reference_t<_Iter>> && common_reference_with>, invoke_result_t<_Fn&, iter_reference_t<_Iter>>>; @@ -733,15 +732,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION && copy_constructible<_Fn> && regular_invocable<_Fn&, __indirect_value_t<_Iter>> && regular_invocable<_Fn&, iter_reference_t<_Iter>> - && regular_invocable<_Fn&, iter_common_reference_t<_Iter>> && common_reference_with>, invoke_result_t<_Fn&, iter_reference_t<_Iter>>>; template concept indirect_unary_predicate = indirectly_readable<_Iter> && copy_constructible<_Fn> && predicate<_Fn&, __indirect_value_t<_Iter>> - && predicate<_Fn&, iter_reference_t<_Iter>> - && predicate<_Fn&, iter_common_reference_t<_Iter>>; + && predicate<_Fn&, iter_reference_t<_Iter>>; template concept indirect_binary_predicate @@ -750,9 +747,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION && predicate<_Fn&, __indirect_value_t<_I1>, __indirect_value_t<_I2>> && predicate<_Fn&, __indirect_value_t<_I1>, iter_reference_t<_I2>> && predicate<_Fn&, iter_reference_t<_I1>, __indirect_value_t<_I2>> - && predicate<_Fn&, iter_reference_t<_I1>, iter_reference_t<_I2>> - && predicate<_Fn&, iter_common_reference_t<_I1>, - iter_common_reference_t<_I2>>; + && predicate<_Fn&, iter_reference_t<_I1>, iter_reference_t<_I2>>; template concept indirect_equivalence_relation @@ -762,9 +757,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION && equivalence_relation<_Fn&, __indirect_value_t<_I1>, iter_reference_t<_I2>> && equivalence_relation<_Fn&, iter_reference_t<_I1>, __indirect_value_t<_I2>> && equivalence_relation<_Fn&, iter_reference_t<_I1>, - iter_reference_t<_I2>> - && equivalence_relation<_Fn&, iter_common_reference_t<_I1>, - iter_common_reference_t<_I2>>; + iter_reference_t<_I2>>; template concept indirect_strict_weak_order @@ -773,9 +766,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION && strict_weak_order<_Fn&, __indirect_value_t<_I1>, __indirect_value_t<_I2>> && strict_weak_order<_Fn&, __indirect_value_t<_I1>, iter_reference_t<_I2>> && strict_weak_order<_Fn&, iter_reference_t<_I1>, __indirect_value_t<_I2>> - && strict_weak_order<_Fn&, iter_reference_t<_I1>, iter_reference_t<_I2>> -
[gcc r14-10818] libstdc++: Add some missing ranges feature-test macro tests
https://gcc.gnu.org/g:be56fee60a62014709605af19a84a48b7aa0835a commit r14-10818-gbe56fee60a62014709605af19a84a48b7aa0835a Author: Patrick Palka Date: Thu Aug 22 11:25:10 2024 -0400 libstdc++: Add some missing ranges feature-test macro tests libstdc++-v3/ChangeLog: * testsuite/25_algorithms/contains/1.cc: Verify value of __cpp_lib_ranges_contains. * testsuite/25_algorithms/find_last/1.cc: Verify value of __cpp_lib_ranges_find_last. * testsuite/25_algorithms/iota/1.cc: Verify value of __cpp_lib_ranges_iota. Reviewed-by: Jonathan Wakely (cherry picked from commit 8e0da56f18b3678beee9d2bae27e08a0e122573a) Diff: --- libstdc++-v3/testsuite/25_algorithms/contains/1.cc | 4 libstdc++-v3/testsuite/25_algorithms/find_last/1.cc | 4 libstdc++-v3/testsuite/25_algorithms/iota/1.cc | 5 + 3 files changed, 13 insertions(+) diff --git a/libstdc++-v3/testsuite/25_algorithms/contains/1.cc b/libstdc++-v3/testsuite/25_algorithms/contains/1.cc index 7d3fa048ef61..b44c06032e8a 100644 --- a/libstdc++-v3/testsuite/25_algorithms/contains/1.cc +++ b/libstdc++-v3/testsuite/25_algorithms/contains/1.cc @@ -4,6 +4,10 @@ #include #include +#if __cpp_lib_ranges_contains != 202207L +# error "Feature-test macro __cpp_lib_ranges_contains has wrong value in " +#endif + namespace ranges = std::ranges; void diff --git a/libstdc++-v3/testsuite/25_algorithms/find_last/1.cc b/libstdc++-v3/testsuite/25_algorithms/find_last/1.cc index 911e22887d1d..8a40bb1a6b36 100644 --- a/libstdc++-v3/testsuite/25_algorithms/find_last/1.cc +++ b/libstdc++-v3/testsuite/25_algorithms/find_last/1.cc @@ -4,6 +4,10 @@ #include #include +#if __cpp_lib_ranges_find_last != 202207L +# error "Feature-test macro __cpp_lib_ranges_find_last has wrong value in " +#endif + namespace ranges = std::ranges; constexpr bool diff --git a/libstdc++-v3/testsuite/25_algorithms/iota/1.cc b/libstdc++-v3/testsuite/25_algorithms/iota/1.cc index 61bf418b4dae..ebadeee79a13 100644 --- a/libstdc++-v3/testsuite/25_algorithms/iota/1.cc +++ b/libstdc++-v3/testsuite/25_algorithms/iota/1.cc @@ -1,9 +1,14 @@ // { dg-do run { target c++23 } } #include +#include #include #include +#if __cpp_lib_ranges_iota != 202202L +# error "Feature-test macro __cpp_lib_ranges_iota has wrong value in " +#endif + namespace ranges = std::ranges; void
[gcc r14-10820] libstdc++: Implement LWG 3664 changes to ranges::distance
https://gcc.gnu.org/g:f381a217e9b6c8276bb580a22d12445ed7a7dc8c commit r14-10820-gf381a217e9b6c8276bb580a22d12445ed7a7dc8c Author: Patrick Palka Date: Sat Oct 5 13:48:06 2024 -0400 libstdc++: Implement LWG 3664 changes to ranges::distance libstdc++-v3/ChangeLog: * include/bits/ranges_base.h (__distance_fn::operator()): Adjust iterator/sentinel overloads as per LWG 3664. * testsuite/24_iterators/range_operations/distance.cc: Test LWG 3664 example. Reviewed-by: Jonathan Wakely (cherry picked from commit 7c0d1e9f2a2f1d41d9eb755c36c871d92638c4b7) Diff: --- libstdc++-v3/include/bits/ranges_base.h| 14 +++--- .../testsuite/24_iterators/range_operations/distance.cc| 11 +++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/libstdc++-v3/include/bits/ranges_base.h b/libstdc++-v3/include/bits/ranges_base.h index 23c0b56ff225..67ac8db8b469 100644 --- a/libstdc++-v3/include/bits/ranges_base.h +++ b/libstdc++-v3/include/bits/ranges_base.h @@ -930,7 +930,9 @@ namespace ranges struct __distance_fn final { -template _Sent> +// _GLIBCXX_RESOLVE_LIB_DEFECTS +// 3664. LWG 3392 broke std::ranges::distance(a, a+3) +template _Sent> requires (!sized_sentinel_for<_Sent, _It>) constexpr iter_difference_t<_It> operator()[[nodiscard]](_It __first, _Sent __last) const @@ -944,13 +946,11 @@ namespace ranges return __n; } -template _Sent> +template> _Sent> [[nodiscard]] - constexpr iter_difference_t<_It> - operator()(const _It& __first, const _Sent& __last) const - { - return __last - __first; - } + constexpr iter_difference_t> + operator()(_It&& __first, _Sent __last) const + { return __last - static_cast&>(__first); } template [[nodiscard]] diff --git a/libstdc++-v3/testsuite/24_iterators/range_operations/distance.cc b/libstdc++-v3/testsuite/24_iterators/range_operations/distance.cc index 9a1d0c3efe83..336956936c22 100644 --- a/libstdc++-v3/testsuite/24_iterators/range_operations/distance.cc +++ b/libstdc++-v3/testsuite/24_iterators/range_operations/distance.cc @@ -144,6 +144,16 @@ test05() VERIFY( std::ranges::distance(c4) == 5 ); } +void +test06() +{ + // LWG 3664 - LWG 3392 broke std::ranges::distance(a, a+3) + int a[] = {1, 2, 3}; + VERIFY( std::ranges::distance(a, a+3) == 3 ); + VERIFY( std::ranges::distance(a, a) == 0 ); + VERIFY( std::ranges::distance(a+3, a) == -3 ); +} + int main() { @@ -152,4 +162,5 @@ main() test03(); test04(); test05(); + test06(); }
[gcc r13-9142] [GCC13/GCC12] Fix testcase.
https://gcc.gnu.org/g:8b43518a01cbbbafe042b85a48fa09a32948380a commit r13-9142-g8b43518a01cbbbafe042b85a48fa09a32948380a Author: liuhongt Date: Tue Oct 22 11:24:23 2024 +0800 [GCC13/GCC12] Fix testcase. The optimization relies on other patterns which are only available at GCC14 and obove, so restore the xfail for GCC13/12 branch. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512bw-pr103750-2.c: Add xfail for ia32. Diff: --- gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c index 3392e193222a..7303f5403ba8 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c @@ -1,7 +1,8 @@ /* PR target/103750 */ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512dq -mavx512bw -mavx512vl" } */ -/* { dg-final { scan-assembler-not "kmov" } } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ #include extern __m128i* pi128;
[gcc r12-10781] [GCC13/GCC12] Fix testcase.
https://gcc.gnu.org/g:45bde60836d04cce4637b74ecadbb0aff90b832f commit r12-10781-g45bde60836d04cce4637b74ecadbb0aff90b832f Author: liuhongt Date: Tue Oct 22 11:24:23 2024 +0800 [GCC13/GCC12] Fix testcase. The optimization relies on other patterns which are only available at GCC14 and obove, so restore the xfail for GCC13/12 branch. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512bw-pr103750-2.c: Add xfail for ia32. (cherry picked from commit 8b43518a01cbbbafe042b85a48fa09a32948380a) Diff: --- gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c index 3392e193222a..7303f5403ba8 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c @@ -1,7 +1,8 @@ /* PR target/103750 */ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512dq -mavx512bw -mavx512vl" } */ -/* { dg-final { scan-assembler-not "kmov" } } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ #include extern __m128i* pi128;
[gcc(refs/users/omachota/heads/rtl-ssa-dce)] rtl-ssa: dce fix working with sbitmap
https://gcc.gnu.org/g:d0095cfa468ae39a6b0c2e44951b2772f734a33a commit d0095cfa468ae39a6b0c2e44951b2772f734a33a Author: Ondřej Machota Date: Tue Oct 22 08:40:34 2024 +0200 rtl-ssa: dce fix working with sbitmap Diff: --- gcc/dce.cc | 107 - 1 file changed, 77 insertions(+), 30 deletions(-) diff --git a/gcc/dce.cc b/gcc/dce.cc index 716236d79c1b..929cb259e6d6 100644 --- a/gcc/dce.cc +++ b/gcc/dce.cc @@ -1243,15 +1243,24 @@ bool is_inherently_live(insn_info *insn) } static void -rtl_ssa_dce_init() +rtl_ssa_dce_init(sbitmap &marked_rtx) { calculate_dominance_info(CDI_DOMINATORS); crtl->ssa = new rtl_ssa::function_info(cfun); + + marked_rtx = sbitmap_alloc(get_max_uid() + 1); + bitmap_clear(marked_rtx); + if (dump_file) +fprintf(dump_file, "Allocated `marked_rtx` with size: %d\n", get_max_uid() + 1); } static void -rtl_ssa_dce_done() +rtl_ssa_dce_done(sbitmap marked_rtx) { + sbitmap_free(marked_rtx); + if (dump_file) +fprintf(dump_file, "Freed `marked_rtx`\n"); + free_dominance_info(CDI_DOMINATORS); if (crtl->ssa->perform_pending_updates()) cleanup_cfg(0); @@ -1264,23 +1273,33 @@ rtl_ssa_dce_done() } static void -rtl_ssa_dce_mark_live(insn_info *info, auto_vec worklist, sbitmap marked) { +rtl_ssa_dce_mark_live(insn_info *info, vec &worklist, sbitmap marked_rtx) +{ int info_uid = info->uid(); - bitmap_set_bit(marked, info_uid); - if (dump_file) { + if (dump_file) + { fprintf(dump_file, " Adding insn %d to worklist\n", info_uid); } + if (info_uid < 0) + { + return; + } + bitmap_set_bit(marked_rtx, info_uid); worklist.safe_push(info); } static void -rtl_ssa_dce_mark(sbitmap marked) +rtl_ssa_dce_mark(sbitmap marked_rtx) { insn_info *next; auto_vec worklist; for (insn_info *insn = crtl->ssa->first_insn(); insn; insn = next) { +if (dump_file) +{ + fprintf(dump_file, "Insn: %d\n", insn->uid()); +} next = insn->next_any_insn(); /* I would like to mark visited instruction with something like plf (Pass local flags) as in gimple @@ -1288,22 +1307,18 @@ rtl_ssa_dce_mark(sbitmap marked) This file contains some useful functions: e.g. marked_insn_p, mark_insn mark_insn does much more than I want now... It does quite a useful job. If rtl_insn is a call and it is obsolete, it will find call arguments. -*/ -// insn.defs() // UD chain - this is what I want - reach the ancestors\ - // insn.uses() // DU chain -/* +insn.defs() // UD chain - this is what I want - reach the ancestors\ +insn.uses() // DU chain + + * For marking phi nodes, which don't have uid (insn->rtl() is null) by definition, use a dictionary and store their addresses * Is seems, that insn->uid() is uniq enough */ if (is_inherently_live(insn)) { - if (dump_file) -fprintf(dump_file, " Adding insn %d to worklist\n", insn->uid()); - rtl_ssa_dce_mark_live(insn, marked); - worklist.safe_push(insn); - bitmap_set_bit(marked, insn->uid()); + rtl_ssa_dce_mark_live(insn, worklist, marked_rtx); } // if (insn->can_be_optimized () || insn->is_debug_insn ()) @@ -1311,56 +1326,88 @@ rtl_ssa_dce_mark(sbitmap marked) // worklist.safe_push (insn); } + if (dump_file) +fprintf(dump_file, "Finished inherently live, marking parents\n"); while (!worklist.is_empty()) { +if (dump_file) + fprintf(dump_file, "Brruuh; "); insn_info *insn = worklist.pop(); def_array defs = insn->defs(); // array - because of phi? +if (dump_file) + fprintf(dump_file, "Looking at: %d, defs: %d\n", insn->uid(), defs.size()); for (size_t i = 0; i < defs.size(); i++) { - insn_info *parent_insn = defs[i]->insn(); - int parent_insn_uid = parent_insn->uid(); - if (!bitmap_bit_p(marked, parent_insn_uid)) + if (parent_insn_uid < 0) + { +continue; + } + if (dump_file) +fprintf(dump_file, "Trying to add: %d\n", parent_insn_uid); + if (!bitmap_bit_p(marked_rtx, parent_insn_uid)) { if (dump_file) - fprintf(dump_file, " Adding insn %d to worklist\n", parent_insn_uid); + fprintf(dump_file, " Adding insn %d to worklist - mark\n", parent_insn_uid); worklist.safe_push(parent_insn); -bitmap_set_bit(marked, parent_insn_uid); +if (parent_insn_uid >= 0) + bitmap_set_bit(marked_rtx, parent_insn_uid); } } } } static void -rtl_ssa_dce_sweep(sbitmap marked) +rtl_ssa_dce_sweep(sbitmap marked_rtx) { insn_info *next; + auto_vec to_delete; for (insn_info *insn = crtl->ssa->first_insn(); insn; insn = next) { -if (!bitmap_bit_p(marked, insn->uid())) { +if (dump_file) +{ + fprintf(dump_file, "Insn: %d\n", insn->uid()); +} +next = insn->next_any_insn(); +if (dump_file) +{ + fprintf
[gcc r15-4538] testsuite: Fix typo in ext-floating19.C
https://gcc.gnu.org/g:9263523b7e522e5b8c9ac70df5efc73632c19380 commit r15-4538-g9263523b7e522e5b8c9ac70df5efc73632c19380 Author: Stefan Schulze Frielinghaus Date: Tue Oct 22 08:58:14 2024 +0200 testsuite: Fix typo in ext-floating19.C gcc/testsuite/ChangeLog: * g++.dg/cpp23/ext-floating19.C: Fix typo for bfloat16 guard. Diff: --- gcc/testsuite/g++.dg/cpp23/ext-floating19.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/g++.dg/cpp23/ext-floating19.C b/gcc/testsuite/g++.dg/cpp23/ext-floating19.C index dfbedb986990..a79f7d6e202f 100644 --- a/gcc/testsuite/g++.dg/cpp23/ext-floating19.C +++ b/gcc/testsuite/g++.dg/cpp23/ext-floating19.C @@ -15,6 +15,6 @@ auto x64 = 3.14f64; #ifdef __STDCPP_FLOAT128_T__ auto x128 = 3.14f128; #endif -#ifdef __STDCPP_FLOAT16_T__ +#ifdef __STDCPP_BFLOAT16_T__ auto xbf = 1.2bf16; #endif