[gcc r14-9589] tree-optimization/113727 - bogus SRA with BIT_FIELD_REF
https://gcc.gnu.org/g:9d6ff6f1ea2ae7fc32ec9fbd0554fb06238ed045 commit r14-9589-g9d6ff6f1ea2ae7fc32ec9fbd0554fb06238ed045 Author: Richard Biener Date: Tue Mar 19 14:50:06 2024 +0100 tree-optimization/113727 - bogus SRA with BIT_FIELD_REF When SRA analyzes BIT_FIELD_REFs it handles writes and not byte aligned reads differently from byte aligned reads. Instead of trying to create replacements for the loaded portion the former cases try to replace the base object while keeping the wrapping BIT_FIELD_REFs. This breaks when we have both kinds operating on the same base object if there's no appearant overlap conflict as the conflict that then nevertheless exists isn't handled with. The fix is to enforce what I think is part of the design handling the former case - that only the full base object gets replaced and no further sub-objects are created within as otherwise keeping the wrapping BIT_FIELD_REF cannot work. The patch enforces this within analyze_access_subtree. PR tree-optimization/113727 * tree-sra.cc (analyze_access_subtree): Do not allow replacements in subtrees when grp_partial_lhs. * gcc.dg/torture/pr113727.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/torture/pr113727.c | 26 ++ gcc/tree-sra.cc | 3 ++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/torture/pr113727.c b/gcc/testsuite/gcc.dg/torture/pr113727.c new file mode 100644 index 000..f92ddad5c8e --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr113727.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-require-effective-target int32plus } */ + +struct f { + unsigned au : 5; + unsigned f3 : 21; +} g_994; + +int main() +{ + struct f aq1 = {}; +{ + struct f aq = {9, 5}; + struct f as = aq; + for (int y = 0 ; y <= 4; y += 1) + if (as.au) + { + struct f aa[5] = {{2, 154}, {2, 154}, {2, 154}, {2, 154}, {2, 154}}; + as = aa[0]; + } + aq1 = as; +} + if (aq1.f3 != 0x9a) +__builtin_abort(); + return 0; +} diff --git a/gcc/tree-sra.cc b/gcc/tree-sra.cc index f8e71ec48b9..dbfae5e7fdd 100644 --- a/gcc/tree-sra.cc +++ b/gcc/tree-sra.cc @@ -2735,7 +2735,8 @@ analyze_access_subtree (struct access *root, struct access *parent, { hole |= covered_to < child->offset; sth_created |= analyze_access_subtree (child, root, -allow_replacements && !scalar, +allow_replacements && !scalar +&& !root->grp_partial_lhs, totally); root->grp_unscalarized_data |= child->grp_unscalarized_data;
[gcc r14-9590] tree-optimization/111736 - avoid address sanitizing of __seg_gs
https://gcc.gnu.org/g:134ef2a8cac1a5cc718739bd7d3b3472947c80d6 commit r14-9590-g134ef2a8cac1a5cc718739bd7d3b3472947c80d6 Author: Richard Biener Date: Thu Mar 21 08:30:39 2024 +0100 tree-optimization/111736 - avoid address sanitizing of __seg_gs The following more thoroughly avoids address sanitizing accesses to non-generic address-spaces. PR tree-optimization/111736 * asan.cc (instrument_derefs): Do not instrument accesses to non-generic address-spaces. * gcc.target/i386/pr111736.c: New testcase. Diff: --- gcc/asan.cc | 4 gcc/testsuite/gcc.target/i386/pr111736.c | 23 +++ 2 files changed, 27 insertions(+) diff --git a/gcc/asan.cc b/gcc/asan.cc index cfe83106460..7f91cc616fc 100644 --- a/gcc/asan.cc +++ b/gcc/asan.cc @@ -2755,6 +2755,10 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t, if (VAR_P (inner) && DECL_HARD_REGISTER (inner)) return; + /* Accesses to non-generic address-spaces should not be instrumented. */ + if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (TREE_TYPE (inner +return; + poly_int64 decl_size; if ((VAR_P (inner) || (TREE_CODE (inner) == RESULT_DECL diff --git a/gcc/testsuite/gcc.target/i386/pr111736.c b/gcc/testsuite/gcc.target/i386/pr111736.c new file mode 100644 index 000..231fdd07e80 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr111736.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fsanitize=address" } */ + +int __seg_gs m; + +int foo (void) +{ + return m; +} + +extern int __seg_gs n; + +int bar (void) +{ + return n; +} + +int baz (int __seg_gs *o) +{ + return *o; +} + +/* { dg-final { scan-assembler-not "asan_report_load" } } */
[gcc r13-8474] rs6000: Don't ICE when compiling the __builtin_vsx_splat_2di [PR113950]
https://gcc.gnu.org/g:27eb6e81e6e578da9f9947d3f96c0fa58971fe7f commit r13-8474-g27eb6e81e6e578da9f9947d3f96c0fa58971fe7f Author: Jeevitha Date: Wed Mar 20 23:34:46 2024 -0500 rs6000: Don't ICE when compiling the __builtin_vsx_splat_2di [PR113950] When we expand the __builtin_vsx_splat_2di built-in, we were allowing immediate value for second operand which causes an unrecognizable insn ICE. Even though the immediate value was forced into a register, it wasn't correctly assigned to the second operand. So corrected the assignment of op1 to operands[1]. 2024-03-07 Jeevitha Palanisamy gcc/ PR target/113950 * config/rs6000/vsx.md (vsx_splat_): Correct assignment to operand1 and simplify else if with else. gcc/testsuite/ PR target/113950 * gcc.target/powerpc/pr113950.c: New testcase. (cherry picked from commit fa0468877869f52b05742de6deef582e4dd296fc) Diff: --- gcc/config/rs6000/vsx.md| 4 ++-- gcc/testsuite/gcc.target/powerpc/pr113950.c | 24 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 3506913bd02..f70d69ee4b9 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -4551,8 +4551,8 @@ rtx op1 = operands[1]; if (MEM_P (op1)) operands[1] = rs6000_force_indexed_or_indirect_mem (op1); - else if (!REG_P (op1)) -op1 = force_reg (mode, op1); + else +operands[1] = force_reg (mode, op1); }) (define_insn "vsx_splat__reg" diff --git a/gcc/testsuite/gcc.target/powerpc/pr113950.c b/gcc/testsuite/gcc.target/powerpc/pr113950.c new file mode 100644 index 000..359963d1041 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr113950.c @@ -0,0 +1,24 @@ +/* PR target/113950 */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx" } */ + +/* Verify we do not ICE on the following. */ + +void abort (void); + +int main () +{ + int i; + vector signed long long vsll_result, vsll_expected_result; + signed long long sll_arg1; + + sll_arg1 = 300; + vsll_expected_result = (vector signed long long) {300, 300}; + vsll_result = __builtin_vsx_splat_2di (sll_arg1); + + for (i = 0; i < 2; i++) +if (vsll_result[i] != vsll_expected_result[i]) + abort(); + + return 0; +}
[gcc r14-9591] Fix runtime error for nonlinear iv vectorization(step_mult).
https://gcc.gnu.org/g:ac2f8c2a367151fc0410f904339c475a953cffc8 commit r14-9591-gac2f8c2a367151fc0410f904339c475a953cffc8 Author: liuhongt Date: Thu Mar 21 13:15:23 2024 +0800 Fix runtime error for nonlinear iv vectorization(step_mult). wi::from_mpz doesn't take a sign argument, we want it to be wrapped instead of saturation, so pass utype and true to it, and it fixes the bug. gcc/ChangeLog: PR tree-optimization/114396 * tree-vect-loop.cc (vect_peel_nonlinear_iv_init): Pass utype and true to wi::from_mpz. gcc/testsuite/ChangeLog: * gcc.target/i386/pr114396.c: New test. Diff: --- gcc/testsuite/gcc.target/i386/pr114396.c | 105 +++ gcc/tree-vect-loop.cc| 2 +- 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/pr114396.c b/gcc/testsuite/gcc.target/i386/pr114396.c new file mode 100644 index 000..4c4015f871f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr114396.c @@ -0,0 +1,105 @@ +/* { dg-do run } */ +/* { dg-options "-O1 -fwrapv -fno-vect-cost-model" } */ + +short a = 0xF; +short b[16]; +unsigned short ua = 0xF; +unsigned short ub[16]; + +short +__attribute__((noipa)) +foo (short a) +{ + for (int e = 0; e < 9; e += 1) +b[e] = a *= 5; + return a; +} + +short +__attribute__((noipa)) +foo1 (short a) +{ + for (int e = 0; e < 9; e += 1) +b[e] = a *= -5; + return a; +} + +unsigned short +__attribute__((noipa)) +foou (unsigned short a) +{ + for (int e = 0; e < 9; e += 1) +ub[e] = a *= -5; + return a; +} + +unsigned short +__attribute__((noipa)) +foou1 (unsigned short a) +{ + for (int e = 0; e < 9; e += 1) +ub[e] = a *= 5; + return a; +} + +short +__attribute__((noipa,optimize("O3"))) +foo_o3 (short a) +{ + for (int e = 0; e < 9; e += 1) +b[e] = a *= 5; + return a; +} + +short +__attribute__((noipa,optimize("O3"))) +foo1_o3 (short a) +{ + for (int e = 0; e < 9; e += 1) +b[e] = a *= -5; + return a; +} + +unsigned short +__attribute__((noipa,optimize("O3"))) +foou_o3 (unsigned short a) +{ + for (int e = 0; e < 9; e += 1) +ub[e] = a *= -5; + return a; +} + +unsigned short +__attribute__((noipa,optimize("O3"))) +foou1_o3 (unsigned short a) +{ + for (int e = 0; e < 9; e += 1) +ub[e] = a *= 5; + return a; +} + +int main() { + unsigned short uexp, ures; + short exp, res; + exp = foo (a); + res = foo_o3 (a); + if (exp != res) +__builtin_abort (); + + exp = foo1 (a); + res = foo1_o3 (a); + if (uexp != ures) +__builtin_abort (); + + uexp = foou (a); + ures = foou_o3 (a); + if (uexp != ures) +__builtin_abort (); + + uexp = foou1 (a); + ures = foou1_o3 (a); + if (uexp != ures) +__builtin_abort (); + + return 0; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 4375ebdcb49..2921a9e6aa1 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -9454,7 +9454,7 @@ vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree init_expr, wi::to_mpz (skipn, exp, UNSIGNED); mpz_ui_pow_ui (mod, 2, TYPE_PRECISION (type)); mpz_powm (res, base, exp, mod); - begin = wi::from_mpz (type, res, TYPE_SIGN (type)); + begin = wi::from_mpz (utype, res, true); tree mult_expr = wide_int_to_tree (utype, begin); init_expr = gimple_build (stmts, MULT_EXPR, utype, init_expr, mult_expr);
[gcc r13-8475] Fix runtime error for nonlinear iv vectorization(step_mult).
https://gcc.gnu.org/g:199b021a38f30b681e0dbecd2d0296beabd50b13 commit r13-8475-g199b021a38f30b681e0dbecd2d0296beabd50b13 Author: liuhongt Date: Thu Mar 21 13:15:23 2024 +0800 Fix runtime error for nonlinear iv vectorization(step_mult). wi::from_mpz doesn't take a sign argument, we want it to be wrapped instead of saturation, so pass utype and true to it, and it fixes the bug. gcc/ChangeLog: PR tree-optimization/114396 * tree-vect-loop.cc (vect_peel_nonlinear_iv_init): Pass utype and true to wi::from_mpz. gcc/testsuite/ChangeLog: * gcc.target/i386/pr114396.c: New test. (cherry picked from commit ac2f8c2a367151fc0410f904339c475a953cffc8) Diff: --- gcc/testsuite/gcc.target/i386/pr114396.c | 105 +++ gcc/tree-vect-loop.cc| 2 +- 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/pr114396.c b/gcc/testsuite/gcc.target/i386/pr114396.c new file mode 100644 index 000..4c4015f871f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr114396.c @@ -0,0 +1,105 @@ +/* { dg-do run } */ +/* { dg-options "-O1 -fwrapv -fno-vect-cost-model" } */ + +short a = 0xF; +short b[16]; +unsigned short ua = 0xF; +unsigned short ub[16]; + +short +__attribute__((noipa)) +foo (short a) +{ + for (int e = 0; e < 9; e += 1) +b[e] = a *= 5; + return a; +} + +short +__attribute__((noipa)) +foo1 (short a) +{ + for (int e = 0; e < 9; e += 1) +b[e] = a *= -5; + return a; +} + +unsigned short +__attribute__((noipa)) +foou (unsigned short a) +{ + for (int e = 0; e < 9; e += 1) +ub[e] = a *= -5; + return a; +} + +unsigned short +__attribute__((noipa)) +foou1 (unsigned short a) +{ + for (int e = 0; e < 9; e += 1) +ub[e] = a *= 5; + return a; +} + +short +__attribute__((noipa,optimize("O3"))) +foo_o3 (short a) +{ + for (int e = 0; e < 9; e += 1) +b[e] = a *= 5; + return a; +} + +short +__attribute__((noipa,optimize("O3"))) +foo1_o3 (short a) +{ + for (int e = 0; e < 9; e += 1) +b[e] = a *= -5; + return a; +} + +unsigned short +__attribute__((noipa,optimize("O3"))) +foou_o3 (unsigned short a) +{ + for (int e = 0; e < 9; e += 1) +ub[e] = a *= -5; + return a; +} + +unsigned short +__attribute__((noipa,optimize("O3"))) +foou1_o3 (unsigned short a) +{ + for (int e = 0; e < 9; e += 1) +ub[e] = a *= 5; + return a; +} + +int main() { + unsigned short uexp, ures; + short exp, res; + exp = foo (a); + res = foo_o3 (a); + if (exp != res) +__builtin_abort (); + + exp = foo1 (a); + res = foo1_o3 (a); + if (uexp != ures) +__builtin_abort (); + + uexp = foou (a); + ures = foou_o3 (a); + if (uexp != ures) +__builtin_abort (); + + uexp = foou1 (a); + ures = foou1_o3 (a); + if (uexp != ures) +__builtin_abort (); + + return 0; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index d08d4996771..9615161ad37 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -8730,7 +8730,7 @@ vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree init_expr, wi::to_mpz (skipn, exp, UNSIGNED); mpz_ui_pow_ui (mod, 2, TYPE_PRECISION (type)); mpz_powm (res, base, exp, mod); - begin = wi::from_mpz (type, res, TYPE_SIGN (type)); + begin = wi::from_mpz (utype, res, true); tree mult_expr = wide_int_to_tree (utype, begin); init_expr = gimple_build (stmts, MULT_EXPR, utype, init_expr, mult_expr);
[gcc r13-8476] tree-optimization/111736 - avoid address sanitizing of __seg_gs
https://gcc.gnu.org/g:6d5eb47849bcf9aecefacf7d7e4767750b1ec83b commit r13-8476-g6d5eb47849bcf9aecefacf7d7e4767750b1ec83b Author: Richard Biener Date: Thu Mar 21 08:30:39 2024 +0100 tree-optimization/111736 - avoid address sanitizing of __seg_gs The following more thoroughly avoids address sanitizing accesses to non-generic address-spaces. PR tree-optimization/111736 * asan.cc (instrument_derefs): Do not instrument accesses to non-generic address-spaces. * gcc.target/i386/pr111736.c: New testcase. (cherry picked from commit 134ef2a8cac1a5cc718739bd7d3b3472947c80d6) Diff: --- gcc/asan.cc | 4 gcc/testsuite/gcc.target/i386/pr111736.c | 23 +++ 2 files changed, 27 insertions(+) diff --git a/gcc/asan.cc b/gcc/asan.cc index 15feecfb495..df732c02150 100644 --- a/gcc/asan.cc +++ b/gcc/asan.cc @@ -2752,6 +2752,10 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t, if (VAR_P (inner) && DECL_HARD_REGISTER (inner)) return; + /* Accesses to non-generic address-spaces should not be instrumented. */ + if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (TREE_TYPE (inner +return; + poly_int64 decl_size; if ((VAR_P (inner) || TREE_CODE (inner) == RESULT_DECL) && offset == NULL_TREE diff --git a/gcc/testsuite/gcc.target/i386/pr111736.c b/gcc/testsuite/gcc.target/i386/pr111736.c new file mode 100644 index 000..231fdd07e80 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr111736.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fsanitize=address" } */ + +int __seg_gs m; + +int foo (void) +{ + return m; +} + +extern int __seg_gs n; + +int bar (void) +{ + return n; +} + +int baz (int __seg_gs *o) +{ + return *o; +} + +/* { dg-final { scan-assembler-not "asan_report_load" } } */
[gcc r13-8477] debug/112718 - reset all type units with -ffat-lto-objects
https://gcc.gnu.org/g:dd1948d467dc25b9b462b173ec40b95f6aa51356 commit r13-8477-gdd1948d467dc25b9b462b173ec40b95f6aa51356 Author: Richard Biener Date: Mon Jan 22 15:42:59 2024 +0100 debug/112718 - reset all type units with -ffat-lto-objects When mixing -flto, -ffat-lto-objects and -fdebug-type-section we fail to reset all type units after early output resulting in an ICE when attempting to add then duplicate sibling attributes. PR debug/112718 * dwarf2out.cc (dwarf2out_finish): Reset all type units for the fat part of an LTO compile. * gcc.dg/debug/pr112718.c: New testcase. (cherry picked from commit 7218f5050cb7163edae331f54ca163248ab48bfa) Diff: --- gcc/dwarf2out.cc | 12 gcc/testsuite/gcc.dg/debug/pr112718.c | 12 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc index 33f5f4d5ddb..bd82b86a829 100644 --- a/gcc/dwarf2out.cc +++ b/gcc/dwarf2out.cc @@ -32191,24 +32191,12 @@ dwarf2out_finish (const char *filename) reset_dies (comp_unit_die ()); for (limbo_die_node *node = cu_die_list; node; node = node->next) reset_dies (node->die); - - hash_table comdat_type_table (100); for (ctnode = comdat_type_list; ctnode != NULL; ctnode = ctnode->next) { - comdat_type_node **slot - = comdat_type_table.find_slot (ctnode, INSERT); - - /* Don't reset types twice. */ - if (*slot != HTAB_EMPTY_ENTRY) - continue; - /* Remove the pointer to the line table. */ remove_AT (ctnode->root_die, DW_AT_stmt_list); - if (debug_info_level >= DINFO_LEVEL_TERSE) reset_dies (ctnode->root_die); - - *slot = ctnode; } /* Reset die CU symbol so we don't output it twice. */ diff --git a/gcc/testsuite/gcc.dg/debug/pr112718.c b/gcc/testsuite/gcc.dg/debug/pr112718.c new file mode 100644 index 000..ff80ca5a298 --- /dev/null +++ b/gcc/testsuite/gcc.dg/debug/pr112718.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lto } */ +/* { dg-options "-g -fdebug-types-section -flto -ffat-lto-objects" } */ + +struct { + int h; + unsigned char data[20 + 24 * 6]; +} _EC_X9_62_PRIME_192V2; +struct { + int h; + unsigned char data[20 + 24 * 6]; +} _EC_X9_62_PRIME_192V3;
[gcc r13-8479] tree-optimization/114027 - conditional reduction chain
https://gcc.gnu.org/g:bd276b5340563182f7d95c383196fdd6fb7e6a1d commit r13-8479-gbd276b5340563182f7d95c383196fdd6fb7e6a1d Author: Richard Biener Date: Thu Feb 22 10:50:12 2024 +0100 tree-optimization/114027 - conditional reduction chain When we classify a conditional reduction chain as CONST_COND_REDUCTION we fail to verify all involved conditionals have the same constant. That's a quite unlikely situation so the following simply disables such classification when there's more than one reduction statement. PR tree-optimization/114027 * tree-vect-loop.cc (vecctorizable_reduction): Use optimized condition reduction classification only for single-element chains. * gcc.dg/vect/pr114027.c: New testcase. (cherry picked from commit 549f251f055e3a0b0084189a3012c4f15d635e75) Diff: --- gcc/testsuite/gcc.dg/vect/pr114027.c | 26 ++ gcc/tree-vect-loop.cc| 11 ++- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr114027.c b/gcc/testsuite/gcc.dg/vect/pr114027.c new file mode 100644 index 000..ead9cdd982d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr114027.c @@ -0,0 +1,26 @@ +#include "tree-vect.h" + +int __attribute__((noipa)) +foo (int *f, int n) +{ + int res = 0; + for (int i = 0; i < n; ++i) +{ + if (f[2*i]) +res = 2; + if (f[2*i+1]) +res = -2; +} + return res; +} + +int f[] = { 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 0 }; + +int +main () +{ + if (foo (f, 16) != 2) +__builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 9615161ad37..b4ce9535646 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -7065,17 +7065,18 @@ vectorizable_reduction (loop_vec_info loop_vinfo, < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_op[i])) vectype_in = vectype_op[i]; - if (op.code == COND_EXPR) + /* Record how the non-reduction-def value of COND_EXPR is defined. +??? For a chain of multiple CONDs we'd have to match them up all. */ + if (op.code == COND_EXPR && reduc_chain_length == 1) { - /* Record how the non-reduction-def value of COND_EXPR is defined. */ if (dt == vect_constant_def) { cond_reduc_dt = dt; cond_reduc_val = op.ops[i]; } - if (dt == vect_induction_def - && def_stmt_info - && is_nonwrapping_integer_induction (def_stmt_info, loop)) + else if (dt == vect_induction_def + && def_stmt_info + && is_nonwrapping_integer_induction (def_stmt_info, loop)) { cond_reduc_dt = dt; cond_stmt_vinfo = def_stmt_info;
[gcc r13-8481] middle-end/114070 - VEC_COND_EXPR folding
https://gcc.gnu.org/g:a3ff14ac4804be400a52dcf630f0de2d57cae835 commit r13-8481-ga3ff14ac4804be400a52dcf630f0de2d57cae835 Author: Richard Biener Date: Thu Feb 29 09:22:19 2024 +0100 middle-end/114070 - VEC_COND_EXPR folding The following amends the PR114070 fix to optimistically allow the folding when we cannot expand the current vec_cond using vcond_mask and we're still before vector lowering. This leaves a small window between vectorization and lowering where we could break vec_conds that can be expanded via vcond{,u,eq}, most susceptible is the loop unrolling pass which applies VN and thus possibly folding to the unrolled body of a vectorized loop. This gets back the folding for targets that cannot do vectorization. It doesn't get back the folding for x86 with AVX512 for example since that can handle the original IL but not the folded since it misses some vcond_mask expanders. PR middle-end/114070 * match.pd ((c ? a : b) op d --> c ? (a op d) : (b op d)): Allow the folding if before lowering and the current IL isn't supported with vcond_mask. (cherry picked from commit f9c30ea737b806caac917d8f501305151a2cbd57) Diff: --- gcc/match.pd | 18 +++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 18a523fe9a8..908959de3e7 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4464,7 +4464,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (op (vec_cond:s @0 @1 @2) (vec_cond:s @0 @3 @4)) (if (TREE_CODE_CLASS (op) != tcc_comparison || types_match (type, TREE_TYPE (@1)) - || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK)) + || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK) + || (optimize_vectors_before_lowering_p () + /* The following is optimistic on the side of non-support, we are + missing the legacy vcond{,u,eq} cases. Do this only when + lowering will be able to fixup.. */ + && !expand_vec_cond_expr_p (TREE_TYPE (@1), + TREE_TYPE (@0), ERROR_MARK))) (vec_cond @0 (op! @1 @3) (op! @2 @4 /* (c ? a : b) op d --> c ? (a op d) : (b op d) */ @@ -4472,13 +4478,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (op (vec_cond:s @0 @1 @2) @3) (if (TREE_CODE_CLASS (op) != tcc_comparison || types_match (type, TREE_TYPE (@1)) - || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK)) + || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK) + || (optimize_vectors_before_lowering_p () + && !expand_vec_cond_expr_p (TREE_TYPE (@1), + TREE_TYPE (@0), ERROR_MARK))) (vec_cond @0 (op! @1 @3) (op! @2 @3 (simplify (op @3 (vec_cond:s @0 @1 @2)) (if (TREE_CODE_CLASS (op) != tcc_comparison || types_match (type, TREE_TYPE (@1)) - || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK)) + || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK) + || (optimize_vectors_before_lowering_p () + && !expand_vec_cond_expr_p (TREE_TYPE (@1), + TREE_TYPE (@0), ERROR_MARK))) (vec_cond @0 (op! @3 @1) (op! @3 @2) #if GIMPLE
[gcc r13-8482] tree-optimization/114203 - wrong CLZ niter computation
https://gcc.gnu.org/g:a729b1227bc8c84cd91a3b8c9c9d11bc43d415de commit r13-8482-ga729b1227bc8c84cd91a3b8c9c9d11bc43d415de Author: Richard Biener Date: Mon Mar 4 10:38:31 2024 +0100 tree-optimization/114203 - wrong CLZ niter computation For precision less than int we apply the adjustment to make it defined at zero after the adjustment to make it compute CLZ rather than CTZ. That's wrong. PR tree-optimization/114203 * tree-ssa-loop-niter.cc (build_cltz_expr): Apply CTZ->CLZ adjustment before making the result defined at zero. * gcc.dg/torture/pr114203.c: New testcase. (cherry picked from commit cde50296a19b109909089b91d532d2c8455f5f10) Diff: --- gcc/testsuite/gcc.dg/torture/pr114203.c | 21 + gcc/tree-ssa-loop-niter.cc | 7 +++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.dg/torture/pr114203.c b/gcc/testsuite/gcc.dg/torture/pr114203.c new file mode 100644 index 000..0ef6279942a --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr114203.c @@ -0,0 +1,21 @@ +/* { dg-do run } */ + +int __attribute__((noipa)) +foo (unsigned char b) +{ + int c = 0; + + while (b) { + b >>= 1; + c++; + } + + return c; +} + +int main() +{ + if (foo(0) != 0) +__builtin_abort (); + return 0; +} diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc index dc4c7a418f6..0ffe4b8465d 100644 --- a/gcc/tree-ssa-loop-niter.cc +++ b/gcc/tree-ssa-loop-niter.cc @@ -2299,6 +2299,9 @@ build_cltz_expr (tree src, bool leading, bool define_at_zero) src = fold_convert (unsigned_type_node, src); call = build_call_expr (fn, 1, src); + if (leading && prec < i_prec) + call = fold_build2 (MINUS_EXPR, integer_type_node, call, + build_int_cst (integer_type_node, i_prec - prec)); if (define_at_zero) { tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src, @@ -2306,10 +2309,6 @@ build_cltz_expr (tree src, bool leading, bool define_at_zero) call = fold_build3 (COND_EXPR, integer_type_node, is_zero, call, build_int_cst (integer_type_node, prec)); } - - if (leading && prec < i_prec) - call = fold_build2 (MINUS_EXPR, integer_type_node, call, - build_int_cst (integer_type_node, i_prec - prec)); } return call;
[gcc r13-8478] tree-optimization/113910 - huge compile time during PTA
https://gcc.gnu.org/g:9a19811ea1e9b3024c0f41b074d71679088bb2d7 commit r13-8478-g9a19811ea1e9b3024c0f41b074d71679088bb2d7 Author: Richard Biener Date: Wed Feb 14 12:33:13 2024 +0100 tree-optimization/113910 - huge compile time during PTA For the testcase in PR113910 we spend a lot of time in PTA comparing bitmaps for looking up equivalence class members. This points to the very weak bitmap_hash function which effectively hashes set and a subset of not set bits. The major problem with it is that it simply truncates the BITMAP_WORD sized intermediate hash to hashval_t which is unsigned int, effectively not hashing half of the bits. This reduces the compile-time for the testcase from tens of minutes to 42 seconds and PTA time from 99% to 46%. PR tree-optimization/113910 * bitmap.cc (bitmap_hash): Mix the full element "hash" to the hashval_t hash. (cherry picked from commit ad7a365aaccecd23ea287c7faaab9c7bd50b944a) Diff: --- gcc/bitmap.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/bitmap.cc b/gcc/bitmap.cc index 20de562caac..d65f6b259dd 100644 --- a/gcc/bitmap.cc +++ b/gcc/bitmap.cc @@ -2673,7 +2673,7 @@ bitmap_hash (const_bitmap head) for (ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++) hash ^= ptr->bits[ix]; } - return (hashval_t)hash; + return iterative_hash (&hash, sizeof (hash), 0); }
[gcc r13-8480] middle-end/114070 - folding breaking VEC_COND expansion
https://gcc.gnu.org/g:a9a425df628ab80374cc6a132d39e470bc78c8bc commit r13-8480-ga9a425df628ab80374cc6a132d39e470bc78c8bc Author: Richard Biener Date: Fri Feb 23 16:06:05 2024 +0100 middle-end/114070 - folding breaking VEC_COND expansion The following properly guards the simplifications that move operations into VEC_CONDs, in particular when that changes the type constraints on this operation. This needed a genmatch fix which was recording spurious implicit fors when tcc_comparison is used in a C expression. PR middle-end/114070 * genmatch.cc (parser::parse_c_expr): Do not record operand lists but only mark operators used. * match.pd ((c ? a : b) op (c ? d : e) --> c ? (a op d) : (b op e)): Properly guard the case of tcc_comparison changing the VEC_COND value operand type. * gcc.dg/torture/pr114070.c: New testcase. (cherry picked from commit af66ad89e8169f44db723813662917cf4cbb78fc) Diff: --- gcc/genmatch.cc | 6 ++ gcc/match.pd| 15 --- gcc/testsuite/gcc.dg/torture/pr114070.c | 12 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc index 98b429a9d0b..c1023d921fc 100644 --- a/gcc/genmatch.cc +++ b/gcc/genmatch.cc @@ -4519,10 +4519,8 @@ parser::parse_c_expr (cpp_ttype start) = (const char *)CPP_HASHNODE (token->val.node.node)->ident.str; if (strcmp (str, "return") == 0) fatal_at (token, "return statement not allowed in C expression"); - id_base *idb = get_operator (str); - user_id *p; - if (idb && (p = dyn_cast (idb)) && p->is_oper_list) - record_operlist (token->src_loc, p); + /* Mark user operators corresponding to 'str' as used. */ + get_operator (str); } /* Record the token. */ diff --git a/gcc/match.pd b/gcc/match.pd index 8e41c973dc2..18a523fe9a8 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4462,15 +4462,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* (c ? a : b) op (c ? d : e) --> c ? (a op d) : (b op e) */ (simplify (op (vec_cond:s @0 @1 @2) (vec_cond:s @0 @3 @4)) - (vec_cond @0 (op! @1 @3) (op! @2 @4))) + (if (TREE_CODE_CLASS (op) != tcc_comparison + || types_match (type, TREE_TYPE (@1)) + || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK)) + (vec_cond @0 (op! @1 @3) (op! @2 @4 /* (c ? a : b) op d --> c ? (a op d) : (b op d) */ (simplify (op (vec_cond:s @0 @1 @2) @3) - (vec_cond @0 (op! @1 @3) (op! @2 @3))) + (if (TREE_CODE_CLASS (op) != tcc_comparison + || types_match (type, TREE_TYPE (@1)) + || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK)) + (vec_cond @0 (op! @1 @3) (op! @2 @3 (simplify (op @3 (vec_cond:s @0 @1 @2)) - (vec_cond @0 (op! @3 @1) (op! @3 @2 + (if (TREE_CODE_CLASS (op) != tcc_comparison + || types_match (type, TREE_TYPE (@1)) + || expand_vec_cond_expr_p (type, TREE_TYPE (@0), ERROR_MARK)) + (vec_cond @0 (op! @3 @1) (op! @3 @2) #if GIMPLE (match (nop_atomic_bit_test_and_p @0 @1 @4) diff --git a/gcc/testsuite/gcc.dg/torture/pr114070.c b/gcc/testsuite/gcc.dg/torture/pr114070.c new file mode 100644 index 000..cf46ec45a04 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr114070.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fno-vect-cost-model" } */ + +int unresolved(unsigned dirmask, unsigned mask, int *unresolved_n) +{ + for (int i = 0; i < 1024; i++) { +mask |= 1; +if (!unresolved_n[i] || unresolved_n[i] & 7) + dirmask |= 1; + } + return (dirmask == mask); +}
[gcc r14-9592] libgcc: Fix up bitint division [PR114397]
https://gcc.gnu.org/g:59b6cece54f33ac4994834d01e18269856576556 commit r14-9592-g59b6cece54f33ac4994834d01e18269856576556 Author: Jakub Jelinek Date: Thu Mar 21 13:07:50 2024 +0100 libgcc: Fix up bitint division [PR114397] The Knuth's division algorithm relies on the number of dividend limbs to be greater ore equal to number of divisor limbs, which is why I've added a special case for un < vn at the start of __divmodbitint4. Unfortunately, my assumption that it then implies abs(v) > abs(u) and so quotient must be 0 and remainder same as dividend is incorrect. This is because this check is done before negation of the operands. While bitint_reduce_prec reduces precision from clearly useless limbs, the problematic case is when the dividend is unsigned or non-negative and divisor is negative. We can have limbs (from MS to LS): dividend: 0 M ?... divisor:-1 -N ?... where M has most significant bit set and M >= N (if M == N then it also the following limbs matter) and the most significant limbs can be even partial. In this case, the quotient should be -1 rather than 0. bitint_reduce_prec will reduce the precision of the dividend so that M is the most significant limb, but can't reduce precision of the divisor to more than having the -1 as most significant limb, because -N doesn't have the most significant bit set. The following patch fixes it by detecting this problematic case in the un < vn handling, and instead of assuming q is 0 and r is u will decrease vn by 1 because it knows the later code will negate the divisor and it can be then expressed after negation in one fewer limbs. 2024-03-21 Jakub Jelinek PR libgcc/114397 * libgcc2.c (__divmodbitint4): Don't assume un < vn always means abs(v) > abs(u), check for a special case of un + 1 == vn where u is non-negative and v negative and after v's negation vn could be reduced by 1. * gcc.dg/torture/bitint-65.c: New test. Diff: --- gcc/testsuite/gcc.dg/torture/bitint-65.c | 44 libgcc/libgcc2.c | 89 2 files changed, 100 insertions(+), 33 deletions(-) diff --git a/gcc/testsuite/gcc.dg/torture/bitint-65.c b/gcc/testsuite/gcc.dg/torture/bitint-65.c new file mode 100644 index 000..b7724d05382 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/bitint-65.c @@ -0,0 +1,44 @@ +/* PR libgcc/114397 */ +/* { dg-do run { target bitint } } */ +/* { dg-options "-std=c23" } */ +/* { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O0" "-O2" } } */ +/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */ + +#if __BITINT_MAXWIDTH__ >= 129 +int +foo (unsigned _BitInt (128) a, _BitInt (129) b) +{ + return a / b; +} +#endif + +#if __BITINT_MAXWIDTH__ >= 192 +int +bar (unsigned _BitInt (128) a, _BitInt (192) b) +{ + return a / b; +} +#endif + +int +main () +{ +#if __BITINT_MAXWIDTH__ >= 129 + if (foo (336225022742818342628768636932743029911uwb, + -336225022742818342628768636932743029911wb) != -1 + || foo (336225022742818342628768636932743029912uwb, + -336225022742818342628768636932743029911wb) != -1 + || foo (336225022742818342628768636932743029911uwb, + -336225022742818342628768636932743029912wb) != 0) +__builtin_abort (); +#endif +#if __BITINT_MAXWIDTH__ >= 192 + if (bar (336225022742818342628768636932743029911uwb, + -336225022742818342628768636932743029911wb) != -1 + || bar (336225022742818342628768636932743029912uwb, + -336225022742818342628768636932743029911wb) != -1 + || bar (336225022742818342628768636932743029911uwb, + -336225022742818342628768636932743029912wb) != 0) +__builtin_abort (); +#endif +} diff --git a/libgcc/libgcc2.c b/libgcc/libgcc2.c index dc856740a69..71c73d6b846 100644 --- a/libgcc/libgcc2.c +++ b/libgcc/libgcc2.c @@ -1707,44 +1707,67 @@ __divmodbitint4 (UBILtype *q, SItype qprec, USItype vp = avprec % W_TYPE_SIZE; if (__builtin_expect (un < vn, 0)) { - /* If abs(v) > abs(u), then q is 0 and r is u. */ - if (q) - __builtin_memset (q, 0, qn * sizeof (UWtype)); - if (r == NULL) - return; -#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__ - r += rn - 1; - u += un - 1; -#endif - if (up) - --un; - if (rn < un) - un = rn; - for (rn -= un; un; --un) + /* If abs(v) > abs(u), then q is 0 and r is u. +Unfortunately un < vn doesn't always mean abs(v) > abs(u). +If uprec > 0 and vprec < 0 and vn == un + 1, if the +top limb of v is all ones and the second most significant +limb has most significant bit clear, then just decrease +vn/avprec/vp and continue, after negation both numbers +will have the same number of limb
[gcc r14-9593] amdgcn: Clean up device memory in gcn-run
https://gcc.gnu.org/g:c3fb8a4d150586459a9fa177cb2aeeac5e4c0464 commit r14-9593-gc3fb8a4d150586459a9fa177cb2aeeac5e4c0464 Author: Andrew Stubbs Date: Wed Mar 20 12:49:24 2024 + amdgcn: Clean up device memory in gcn-run gcc/ChangeLog: * config/gcn/gcn-run.cc (main): Add an hsa_memory_free calls for each device_malloc call. Diff: --- gcc/config/gcn/gcn-run.cc | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gcc/config/gcn/gcn-run.cc b/gcc/config/gcn/gcn-run.cc index d45ff3e6c2b..2f3ed2d41d2 100644 --- a/gcc/config/gcn/gcn-run.cc +++ b/gcc/config/gcn/gcn-run.cc @@ -755,7 +755,13 @@ main (int argc, char *argv[]) /* Clean shut down. */ XHSA (hsa_fns.hsa_memory_free_fn (kernargs), - "Clean up device memory"); + "Clean up device kernargs memory"); + XHSA (hsa_fns.hsa_memory_free_fn (args), + "Clean up device args memory"); + XHSA (hsa_fns.hsa_memory_free_fn (heap), + "Clean up device heap memory"); + XHSA (hsa_fns.hsa_memory_free_fn (stack), + "Clean up device stack memory"); XHSA (hsa_fns.hsa_executable_destroy_fn (executable), "Clean up GCN executable"); XHSA (hsa_fns.hsa_queue_destroy_fn (queue),
[gcc r14-9594] amdgcn: Ensure gfx11 is running in cumode
https://gcc.gnu.org/g:69dc2dc7e0e853856b84b1bcc89d0241d8a570aa commit r14-9594-g69dc2dc7e0e853856b84b1bcc89d0241d8a570aa Author: Andrew Stubbs Date: Mon Mar 4 15:48:47 2024 + amdgcn: Ensure gfx11 is running in cumode CUmode "on" is the setting for compatibility with GCN and CDNA devices. gcc/ChangeLog: * config/gcn/gcn-hsa.h (ASM_SPEC): Pass -mattr=+cumode. Diff: --- gcc/config/gcn/gcn-hsa.h | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h index 9cf181f52a4..c75256dbac3 100644 --- a/gcc/config/gcn/gcn-hsa.h +++ b/gcc/config/gcn/gcn-hsa.h @@ -107,6 +107,7 @@ extern unsigned int gcn_local_sym_hash (const char *name); "%{" NO_XNACK XNACKOPT "} " \ "%{" NO_SRAM_ECC SRAMOPT "} " \ "%{march=gfx1030|march=gfx1100:-mattr=+wavefrontsize64} " \ + "%{march=gfx1030|march=gfx1100:-mattr=+cumode} " \ "-filetype=obj" #define LINK_SPEC "--pie --export-dynamic" #define LIB_SPEC "-lc"
[gcc r14-9595] amdgcn: Comment correction
https://gcc.gnu.org/g:a2fe34e0b993d5fb879d75ddb42b24b45c4b7242 commit r14-9595-ga2fe34e0b993d5fb879d75ddb42b24b45c4b7242 Author: Andrew Stubbs Date: Mon Mar 4 15:52:00 2024 + amdgcn: Comment correction The location of the marker was changed, but the comment wasn't updated. Fixed now. gcc/ChangeLog: * config/gcn/gcn.cc (gcn_expand_builtin_1): Comment correction. Diff: --- gcc/config/gcn/gcn.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index bc076d1120d..fca001811e5 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -4932,8 +4932,8 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , } case GCN_BUILTIN_FIRST_CALL_THIS_THREAD_P: { - /* Stash a marker in the unused upper 16 bits of s[0:1] to indicate - whether it was the first call. */ + /* Stash a marker in the unused upper 16 bits of QUEUE_PTR_ARG to + indicate whether it was the first call. */ rtx result = gen_reg_rtx (BImode); emit_move_insn (result, const0_rtx); if (cfun->machine->args.reg[QUEUE_PTR_ARG] >= 0)
[gcc r13-8483] middle-end/113622 - allow .VEC_SET and .VEC_EXTRACT for global hard regs
https://gcc.gnu.org/g:d4c0800aab864bb95260e12342d18695c6ebbec8 commit r13-8483-gd4c0800aab864bb95260e12342d18695c6ebbec8 Author: Richard Biener Date: Mon Jan 29 09:47:31 2024 +0100 middle-end/113622 - allow .VEC_SET and .VEC_EXTRACT for global hard regs The following expands .VEC_SET and .VEC_EXTRACT instruction selection to global hard registers, not only automatic variables (possibly) promoted to registers. This can avoid some ICEs later and create better code. PR middle-end/113622 * gimple-isel.cc (gimple_expand_vec_set_extract_expr): Also allow DECL_HARD_REGISTER variables. * gcc.target/i386/pr113622-1.c: New testcase. (cherry picked from commit 96bc048d78f804bac0fa7b2ca3b6dd3a04c68217) Diff: --- gcc/gimple-isel.cc | 3 ++- gcc/testsuite/gcc.target/i386/pr113622-1.c | 12 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc index ef688ddb57f..760029d27c9 100644 --- a/gcc/gimple-isel.cc +++ b/gcc/gimple-isel.cc @@ -77,7 +77,8 @@ gimple_expand_vec_set_expr (struct function *fun, gimple_stmt_iterator *gsi) tree pos = TREE_OPERAND (lhs, 1); tree view_op0 = TREE_OPERAND (op0, 0); machine_mode outermode = TYPE_MODE (TREE_TYPE (view_op0)); - if (auto_var_in_fn_p (view_op0, fun->decl) + if ((auto_var_in_fn_p (view_op0, fun->decl) + || (VAR_P (view_op0) && DECL_HARD_REGISTER (view_op0))) && !TREE_ADDRESSABLE (view_op0) && can_vec_set_var_idx_p (outermode)) { location_t loc = gimple_location (stmt); diff --git a/gcc/testsuite/gcc.target/i386/pr113622-1.c b/gcc/testsuite/gcc.target/i386/pr113622-1.c new file mode 100644 index 000..d3a51cd81dc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr113622-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -w" } */ + +typedef float __attribute__ ((vector_size (64))) vec; +register vec a asm("zmm5"), b asm("zmm6"), c asm("zmm7"); + +void +test (void) +{ + for (int i = 0; i < 8; i++) +c[i] = a[i] < b[i] ? 0.1 : 0.2; +}
[gcc r13-8484] tree-optimization/113670 - gather/scatter to/from hard registers
https://gcc.gnu.org/g:ac664905b837095b15099e44e83471672eee7aa9 commit r13-8484-gac664905b837095b15099e44e83471672eee7aa9 Author: Richard Biener Date: Wed Jan 31 09:09:50 2024 +0100 tree-optimization/113670 - gather/scatter to/from hard registers The following makes sure we're not taking the address of hard registers when vectorizing appearant gathers or scatters to/from them. PR tree-optimization/113670 * tree-vect-data-refs.cc (vect_check_gather_scatter): Make sure we can take the address of the reference base. * gcc.target/i386/pr113670.c: New testcase. (cherry picked from commit 924137b9012cee5603482242de08fbf0b2030f6a) Diff: --- gcc/testsuite/gcc.target/i386/pr113670.c | 16 gcc/tree-vect-data-refs.cc | 5 + 2 files changed, 21 insertions(+) diff --git a/gcc/testsuite/gcc.target/i386/pr113670.c b/gcc/testsuite/gcc.target/i386/pr113670.c new file mode 100644 index 000..8b9d3744fe2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr113670.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-msse2 -O2 -fno-vect-cost-model" } */ + +typedef float __attribute__ ((vector_size (16))) vec; +typedef int __attribute__ ((vector_size (16))) ivec; +ivec x; + +void +test (void) +{ + register vec a asm("xmm3"), b asm("xmm4"); + register ivec c asm("xmm5"); + for (int i = 0; i < 4; i++) +c[i] = a[i] < b[i] ? -1 : 1; + x = c; +} diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index babd83dd830..4fefd046207 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -4029,6 +4029,11 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, if (!multiple_p (pbitpos, BITS_PER_UNIT)) return false; + /* We need to be able to form an address to the base which for example + isn't possible for hard registers. */ + if (may_be_nonaddressable_p (base)) +return false; + poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT); if (TREE_CODE (base) == MEM_REF)
[gcc r13-8486] tree-optimization/114231 - use patterns for BB SLP discovery root stmts
https://gcc.gnu.org/g:04fffbaa87997ac893a9aa68b674c938ba3ecddb commit r13-8486-g04fffbaa87997ac893a9aa68b674c938ba3ecddb Author: Richard Biener Date: Tue Mar 5 10:55:56 2024 +0100 tree-optimization/114231 - use patterns for BB SLP discovery root stmts The following makes sure to use recognized patterns when vectorizing roots during BB SLP discovery. We need to apply those late since during root discovery we've not yet done pattern recognition. All parts of the vectorizer assume patterns get used, for the testcase we mix this up when doing live lane computation. PR tree-optimization/114231 * tree-vect-slp.cc (vect_analyze_slp): Lookup patterns when processing a BB SLP root. * gcc.dg/vect/pr114231.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/vect/pr114231.c | 12 gcc/tree-vect-slp.cc | 4 2 files changed, 16 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/pr114231.c b/gcc/testsuite/gcc.dg/vect/pr114231.c new file mode 100644 index 000..5e3a8103918 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr114231.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ + +void f(long*); +int ff[2]; +void f2(long, long, unsigned long); +void k(unsigned long x, unsigned long y) +{ + long t = x >> ff[0]; + long t1 = ff[1]; + unsigned long t2 = y >> ff[0]; + f2(t1, t+t2, t2); +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 6120620c9a8..e4d3cb89e6f 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -3478,6 +3478,10 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) for (unsigned i = 0; i < bb_vinfo->roots.length (); ++i) { vect_location = bb_vinfo->roots[i].roots[0]->stmt; + /* Apply patterns. */ + for (unsigned j = 0; j < bb_vinfo->roots[i].stmts.length (); ++j) + bb_vinfo->roots[i].stmts[j] + = vect_stmt_to_vectorize (bb_vinfo->roots[i].stmts[j]); if (vect_build_slp_instance (bb_vinfo, bb_vinfo->roots[i].kind, bb_vinfo->roots[i].stmts, bb_vinfo->roots[i].roots,
[gcc r13-8485] tree-optimization/112793 - SLP of constant/external code-generated twice
https://gcc.gnu.org/g:42d5985e9884299c8d837ad1588fb47b211b4baf commit r13-8485-g42d5985e9884299c8d837ad1588fb47b211b4baf Author: Richard Biener Date: Wed Dec 13 14:23:31 2023 +0100 tree-optimization/112793 - SLP of constant/external code-generated twice The following makes the attempt at code-generating a constant/external SLP node twice well-formed as that can happen when partitioning BB vectorization attempts where we keep constants/externals unpartitioned. PR tree-optimization/112793 * tree-vect-slp.cc (vect_schedule_slp_node): Already code-generated constant/external nodes are OK. * g++.dg/vect/pr112793.cc: New testcase. (cherry picked from commit d782ec8362eadc3169286eb1e39c631effd02323) Diff: --- gcc/testsuite/g++.dg/vect/pr112793.cc | 32 gcc/tree-vect-slp.cc | 16 +--- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/gcc/testsuite/g++.dg/vect/pr112793.cc b/gcc/testsuite/g++.dg/vect/pr112793.cc new file mode 100644 index 000..258d7c1b111 --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/pr112793.cc @@ -0,0 +1,32 @@ +// { dg-do compile } +// { dg-require-effective-target c++11 } +// { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } + +typedef double T; +T c, s; +T a[16]; +struct Matrix4 { + Matrix4(){} + Matrix4(T e, T f, T i, T j) { +r[1] = r[4] = e; +r[5] = f; +r[8] = i; +r[9] = j; + } + Matrix4 operator*(Matrix4 a) { +return Matrix4( + r[0] * a.r[4] + r[4] + r[15] + r[6], + r[1] * a.r[4] + 1 + 2 + 3, r[0] * r[8] + 1 + 2 + 3, + r[1] * r[8] + r[1] + r[14] + r[2] * r[3]); + } + T r[16] = {}; +}; +Matrix4 t1, t2; +Matrix4 tt; +Matrix4 getRotAltAzToEquatorial() +{ + t2.r[4] = 0; + t1.r[1] = -s; + t1.r[8] = 0; + return t1 * t2; +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index a6c03fe6442..6120620c9a8 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -8777,12 +8777,6 @@ vect_schedule_slp_node (vec_info *vinfo, int i; slp_tree child; - /* For existing vectors there's nothing to do. */ - if (SLP_TREE_VEC_DEFS (node).exists ()) -return; - - gcc_assert (SLP_TREE_VEC_STMTS (node).is_empty ()); - /* Vectorize externals and constants. */ if (SLP_TREE_DEF_TYPE (node) == vect_constant_def || SLP_TREE_DEF_TYPE (node) == vect_external_def) @@ -8793,10 +8787,18 @@ vect_schedule_slp_node (vec_info *vinfo, if (!SLP_TREE_VECTYPE (node)) return; - vect_create_constant_vectors (vinfo, node); + /* There are two reasons vector defs might already exist. The first +is that we are vectorizing an existing vector def. The second is +when performing BB vectorization shared constant/external nodes +are not split apart during partitioning so during the code-gen +DFS walk we can end up visiting them twice. */ + if (! SLP_TREE_VEC_DEFS (node).exists ()) + vect_create_constant_vectors (vinfo, node); return; } + gcc_assert (SLP_TREE_VEC_DEFS (node).is_empty ()); + stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node); gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
[gcc r14-9596] c++: explicit inst of template method not generated [PR110323]
https://gcc.gnu.org/g:081f8937cb82da311c224da04b0c6cbd57a8fb5d commit r14-9596-g081f8937cb82da311c224da04b0c6cbd57a8fb5d Author: Marek Polacek Date: Thu Mar 7 20:41:23 2024 -0500 c++: explicit inst of template method not generated [PR110323] Consider constexpr int VAL = 1; struct foo { template void bar(typename std::conditional::type arg) { } }; template void foo::bar<1>(int arg); where we since r11-291 fail to emit the code for the explicit instantiation. That's because cp_walk_subtrees/TYPENAME_TYPE now walks TYPE_CONTEXT ('conditional' here) as well, and in a template finds the B==VAL template argument. VAL is constexpr, which implies const, which in the global scope implies static. constrain_visibility_for_template then makes "struct conditional<(B == VAL), int, float>" non-TREE_PUBLIC. Then symtab_node::needed_p checks TREE_PUBLIC, sees it's 0, and we don't emit any code. I thought the fix would be some ODR-esque check to not consider constexpr variables/fns that are used just for their value. But it turned out to be tricky. For instance, we can't skip determine_visibility in a template; we can't even skip it for value-dep expressions. For example, no-linkage-expr1.C has using P = struct {}*; template void f(int(*)[((P)0, N)]) {} where ((P)0, N) is value-dep, but N is not relevant here: we have to ferret out the anonymous type. When instantiating, it's already gone. This patch uses decl_constant_var_p. This is to implement (an approximation) [basic.def.odr]#14.5.1 and [basic.def.odr]#5.2. PR c++/110323 gcc/cp/ChangeLog: * decl2.cc (min_vis_expr_r) : Do nothing for decl_constant_var_p VAR_DECLs. gcc/testsuite/ChangeLog: * g++.dg/template/explicit-instantiation6.C: New test. * g++.dg/template/explicit-instantiation7.C: New test. Diff: --- gcc/cp/decl2.cc| 7 +++- .../g++.dg/template/explicit-instantiation6.C | 43 ++ .../g++.dg/template/explicit-instantiation7.C | 22 +++ 3 files changed, 71 insertions(+), 1 deletion(-) diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc index 2562d8aeff6..1339f210dde 100644 --- a/gcc/cp/decl2.cc +++ b/gcc/cp/decl2.cc @@ -2718,7 +2718,12 @@ min_vis_expr_r (tree *tp, int */*walk_subtrees*/, void *data) /* Fall through. */ case VAR_DECL: case FUNCTION_DECL: - if (! TREE_PUBLIC (t)) + if (decl_constant_var_p (t)) + /* The ODR allows definitions in different TUs to refer to distinct + constant variables with internal or no linkage, so such a reference + shouldn't affect visibility (PR110323). FIXME but only if the + lvalue-rvalue conversion is applied. */; + else if (! TREE_PUBLIC (t)) tpvis = VISIBILITY_ANON; else tpvis = DECL_VISIBILITY (t); diff --git a/gcc/testsuite/g++.dg/template/explicit-instantiation6.C b/gcc/testsuite/g++.dg/template/explicit-instantiation6.C new file mode 100644 index 000..8b77c9deb20 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/explicit-instantiation6.C @@ -0,0 +1,43 @@ +// PR c++/110323 +// { dg-do compile { target c++14 } } + +template +struct conditional { using type = T; }; + +template +struct conditional { using type = F; }; + +constexpr int VAL = 1; + +static constexpr int getval () { return 1; } + +template +constexpr int TVAL = 1; + +static struct S { + constexpr operator bool() { return true; } +} s; + +struct foo { +template +void bar(typename conditional::type arg) { } + +template +void qux(typename conditional, int, float>::type arg) { } + +template +void sox(typename conditional::type arg) { } + +template +void nim(typename conditional::type arg) { } +}; + +template void foo::bar<1>(int arg); +template void foo::qux<1>(int arg); +template void foo::sox<1>(int arg); +template void foo::nim<1>(int arg); + +// { dg-final { scan-assembler "_ZN3foo3barILi1EEEvN11conditionalIXeqT_L_ZL3VALEEifE4typeE" } } +// { dg-final { scan-assembler "_ZN3foo3quxILi1EEEvN11conditionalIXeqT_L_Z4TVALIiEEEifE4typeE" } } +// { dg-final { scan-assembler "_ZN3foo3soxILi1EEEvN11conditionalIXeqT_nxL_ZL3VALEEifE4typeE" } } +// { dg-final { scan-assembler "_ZN3foo3nimILi1EEEvN11conditionalIXneT_szL_ZL3VALEEifE4typeE" } } diff --git a/gcc/testsuite/g++.dg/template/explicit-instantiation7.C b/gcc/testsuite/g++.dg/template/explicit-instantiation7.C new file mode 100644 index 000..9a870e808fa --- /dev/null +++ b/gcc/testsuite/g++.dg/template/explicit-instantiation7.C @@ -0,0 +1,22 @@ +// PR c++/110323 +// { dg-do compile { target c++11 } } + +using P = struct { }*; +using N = struct A { }*; + +template +struct conditional { using type = T; }; + +struct foo { +te
[gcc r14-9597] Fortran: improve array component description in runtime error message [PR30802]
https://gcc.gnu.org/g:509352069d6f166d396f4b4a86e71ea521f2ca78 commit r14-9597-g509352069d6f166d396f4b4a86e71ea521f2ca78 Author: Harald Anlauf Date: Wed Mar 20 20:59:24 2024 +0100 Fortran: improve array component description in runtime error message [PR30802] Runtime error messages for array bounds violation shall use the following scheme for a coherent, abridged description of arrays or array components of derived types: (1) If x is an ordinary array variable, use "x" (2) if z is a DT scalar and x an array component at level 1, use "z%x" (3) if z is a DT scalar and x an array component at level > 1, or if z is a DT array and x an array (at any level), use "z...%x" Use a new helper function abridged_ref_name for construction of that name. gcc/fortran/ChangeLog: PR fortran/30802 * trans-array.cc (abridged_ref_name): New helper function. (trans_array_bound_check): Use it. (array_bound_check_elemental): Likewise. (gfc_conv_array_ref): Likewise. gcc/testsuite/ChangeLog: PR fortran/30802 * gfortran.dg/bounds_check_17.f90: Adjust pattern. * gfortran.dg/bounds_check_fail_8.f90: New test. Diff: --- gcc/fortran/trans-array.cc| 132 ++ gcc/testsuite/gfortran.dg/bounds_check_17.f90 | 2 +- gcc/testsuite/gfortran.dg/bounds_check_fail_8.f90 | 56 + 3 files changed, 142 insertions(+), 48 deletions(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 0a453828bad..30b84762346 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -3485,6 +3485,78 @@ gfc_conv_array_ubound (tree descriptor, int dim) } +/* Generate abridged name of a part-ref for use in bounds-check message. + Cases: + (1) for an ordinary array variable x return "x" + (2) for z a DT scalar and array component x (at level 1) return "z%%x" + (3) for z a DT scalar and array component x (at level > 1) or + for z a DT array and array x (at any number of levels): "z...%%x" + */ + +static char * +abridged_ref_name (gfc_expr * expr, gfc_array_ref * ar) +{ + gfc_ref *ref; + gfc_symbol *sym; + char *ref_name = NULL; + const char *comp_name = NULL; + int len_sym, last_len = 0, level = 0; + bool sym_is_array; + + gcc_assert (expr->expr_type == EXPR_VARIABLE && expr->ref != NULL); + + sym = expr->symtree->n.sym; + sym_is_array = (sym->ts.type != BT_CLASS + ? sym->as != NULL + : IS_CLASS_ARRAY (sym)); + len_sym = strlen (sym->name); + + /* Scan ref chain to get name of the array component (when ar != NULL) or + array section, determine depth and remember its component name. */ + for (ref = expr->ref; ref; ref = ref->next) +{ + if (ref->type == REF_COMPONENT + && strcmp (ref->u.c.component->name, "_data") != 0) + { + level++; + comp_name = ref->u.c.component->name; + continue; + } + + if (ref->type != REF_ARRAY) + continue; + + if (ar) + { + if (&ref->u.ar == ar) + break; + } + else if (ref->u.ar.type == AR_SECTION) + break; +} + + if (level > 0) +last_len = strlen (comp_name); + + /* Provide a buffer sufficiently large to hold "x...%%z". */ + ref_name = XNEWVEC (char, len_sym + last_len + 6); + strcpy (ref_name, sym->name); + + if (level == 1 && !sym_is_array) +{ + strcat (ref_name, "%%"); + strcat (ref_name, comp_name); +} + else if (level > 0) +{ + strcat (ref_name, "...%%"); + strcat (ref_name, comp_name); +} + + return ref_name; +} + + /* Generate code to perform an array index bound check. */ static tree @@ -3496,7 +3568,9 @@ trans_array_bound_check (gfc_se * se, gfc_ss *ss, tree index, int n, tree tmp_lo, tmp_up; tree descriptor; char *msg; + char *ref_name = NULL; const char * name = NULL; + gfc_expr *expr; if (!(gfc_option.rtcheck & GFC_RTCHECK_BOUNDS)) return index; @@ -3509,6 +3583,12 @@ trans_array_bound_check (gfc_se * se, gfc_ss *ss, tree index, int n, name = ss->info->expr->symtree->n.sym->name; gcc_assert (name != NULL); + /* When we have a component ref, get name of the array section. + Note that there can only be one part ref. */ + expr = ss->info->expr; + if (expr->ref && !compname) +name = ref_name = abridged_ref_name (expr, NULL); + if (VAR_P (descriptor)) name = IDENTIFIER_POINTER (DECL_NAME (descriptor)); @@ -3562,6 +3642,7 @@ trans_array_bound_check (gfc_se * se, gfc_ss *ss, tree index, int n, free (msg); } + free (ref_name); return index; } @@ -3573,36 +3654,17 @@ array_bound_check_elemental (gfc_se * se, gfc_ss * ss, gfc_expr * expr) { gfc_array_ref *ar; gfc_ref *ref; - gfc_symbol *sym; char *var_name = NULL; - size_t len; int dim; if (expr->
[gcc r14-9598] PR modula2/114418 missing import of TSIZE from system causes ICE
https://gcc.gnu.org/g:ba744d50ac0360f7992a42494db766f6548913e3 commit r14-9598-gba744d50ac0360f7992a42494db766f6548913e3 Author: Gaius Mulley Date: Thu Mar 21 18:30:23 2024 + PR modula2/114418 missing import of TSIZE from system causes ICE This patch detects whether the symbol func is NulSym before generating an error and if so just uses the token location and fixed string to generate an error message. gcc/m2/ChangeLog: PR modula2/114418 * gm2-compiler/PCSymBuild.mod (PushConstFunctionType): Check func against NulSym and issue an error. gcc/testsuite/ChangeLog: PR modula2/114418 * gm2/pim/fail/missingtsize.mod: New test. * gm2/pim/fail/missingtsize2.mod: New test. Signed-off-by: Gaius Mulley Diff: --- gcc/m2/gm2-compiler/PCSymBuild.mod | 48 +--- gcc/testsuite/gm2/pim/fail/missingtsize.mod | 8 + gcc/testsuite/gm2/pim/fail/missingtsize2.mod | 8 + 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/gcc/m2/gm2-compiler/PCSymBuild.mod b/gcc/m2/gm2-compiler/PCSymBuild.mod index e2165408781..9a6e8c06e70 100644 --- a/gcc/m2/gm2-compiler/PCSymBuild.mod +++ b/gcc/m2/gm2-compiler/PCSymBuild.mod @@ -1412,6 +1412,38 @@ BEGIN END buildConstFunction ; +(* + ErrorConstFunction - generate an error message at functok using func in the +error message providing it is not NulSym. +*) + +PROCEDURE ErrorConstFunction (func: CARDINAL; functok: CARDINAL) ; +BEGIN + IF func = NulSym + THEN + IF Iso + THEN + ErrorFormat0 (NewError (functok), + 'the only functions permissible in a constant expression are: CAP, CHR, CMPLX, FLOAT, HIGH, IM, LENGTH, MAX, MIN, ODD, ORD, RE, SIZE, TSIZE, TRUNC, VAL and gcc builtins') + ELSE + ErrorFormat0 (NewError (functok), + 'the only functions permissible in a constant expression are: CAP, CHR, FLOAT, HIGH, MAX, MIN, ODD, ORD, SIZE, TSIZE, TRUNC, VAL and gcc builtins') + END + ELSE + IF Iso + THEN + MetaErrorT1 (functok, + 'the only functions permissible in a constant expression are: CAP, CHR, CMPLX, FLOAT, HIGH, IM, LENGTH, MAX, MIN, ODD, ORD, RE, SIZE, TSIZE, TRUNC, VAL and gcc builtins, but not {%1Ead}', + func) + ELSE + MetaErrorT1 (functok, + 'the only functions permissible in a constant expression are: CAP, CHR, FLOAT, HIGH, MAX, MIN, ODD, ORD, SIZE, TSIZE, TRUNC, VAL and gcc builtins, but not {%1Ead}', + func) + END + END +END ErrorConstFunction ; + + (* PushConstFunctionType - *) @@ -1426,7 +1458,10 @@ BEGIN PopTtok (func, functok) ; IF inDesignator THEN - IF (func#Convert) AND + IF func = NulSym + THEN + ErrorConstFunction (func, functok) + ELSIF (func#Convert) AND (IsPseudoBaseFunction(func) OR IsPseudoSystemFunctionConstExpression(func) OR (IsProcedure(func) AND IsProcedureBuiltin(func))) @@ -1442,16 +1477,7 @@ BEGIN WriteFormat0('a constant type conversion can only have one argument') END ELSE - IF Iso - THEN -MetaErrorT1 (functok, - 'the only functions permissible in a constant expression are: CAP, CHR, CMPLX, FLOAT, HIGH, IM, LENGTH, MAX, MIN, ODD, ORD, RE, SIZE, TSIZE, TRUNC, VAL and gcc builtins, but not {%1Ead}', -func) - ELSE -MetaErrorT1 (functok, - 'the only functions permissible in a constant expression are: CAP, CHR, FLOAT, HIGH, MAX, MIN, ODD, ORD, SIZE, TSIZE, TRUNC, VAL and gcc builtins, but not {%1Ead}', -func) - END + ErrorConstFunction (func, functok) END END ; PushTtok (func, functok) diff --git a/gcc/testsuite/gm2/pim/fail/missingtsize.mod b/gcc/testsuite/gm2/pim/fail/missingtsize.mod new file mode 100644 index 000..23ec055d7be --- /dev/null +++ b/gcc/testsuite/gm2/pim/fail/missingtsize.mod @@ -0,0 +1,8 @@ +MODULE missingtsize ; + +CONST + NoOfBytes = TSIZE (CARDINAL) ; + +BEGIN + +END missingtsize. diff --git a/gcc/testsuite/gm2/pim/fail/missingtsize2.mod b/gcc/testsuite/gm2/pim/fail/missingtsize2.mod new file mode 100644 index 000..8e859445ab0 --- /dev/null +++ b/gcc/testsuite/gm2/pim/fail/missingtsize2.mod @@ -0,0 +1,8 @@ +MODULE missingtsize2 ; + +CONST + NoOfBytes = TSIZE (CARDINAL) * 4 ; + +BEGIN + +END missingtsize2. \ No newline at end of file
[gcc r14-9599] PR modula2/113836 gm2 does not dump gimple or quadruples to file
https://gcc.gnu.org/g:48d49200510198cafcab55601cd8e5f8eb541f01 commit r14-9599-g48d49200510198cafcab55601cd8e5f8eb541f01 Author: Gaius Mulley Date: Thu Mar 21 19:38:03 2024 + PR modula2/113836 gm2 does not dump gimple or quadruples to file This patch provides the localized modula2 changes to gcc/m2 which facilitate the dumping of gimple and quadruples to file. PR modula2/113836 will be full complete after a subsequent patch adding changes to lang.opt and documentation. The lang.opt patch requires all language bootstrap regression testing whereas this patch is isolated to gcc/m2 and only the m2 language. gcc/m2/ChangeLog: PR modula2/113836 * Make-lang.in (GM2_C_OBJS): Add m2/gm2-gcc/m2pp.o. (m2/m2pp.o): Remove rule. (GM2-COMP-BOOT-DEFS): Add M2LangDump.def. (GM2-COMP-BOOT-MODS): Add M2LangDump.mod. (GM2-GCC-DEFS): Add M2LangDump.def. (GM2-GCC-MODS): Add M2LangDump.mod. * gm2-compiler/M2CaseList.mod (WriteCase): Rewrite. * gm2-compiler/M2Code.mod (DoModuleDeclare): Call DumpFilteredResolver depending upon DumpLangDecl. (DoCodeBlock): Call CreateDumpGimple depending upon DumpLangGimple. (Code): Replace DisplayQuadList blocks with DumpQuadruples. (DisplayQuadsInScope): Remove. (DisplayQuadNumbers): Remove. (CodeBlock): Rewrite. * gm2-compiler/M2GCCDeclare.def (IncludeDumpSymbol): New procedure. (DumpFilteredResolver): New procedure. (DumpFilteredDefinitive): New procedure. * gm2-compiler/M2GCCDeclare.mod (IncludeDumpSymbol): New procedure. (DumpFilteredResolver): New procedure. (DumpFilteredDefinitive): New procedure. (doInclude): Rewrite to use GetDumpFile. (WatchIncludeList): Remove fixed debugging value. (doExclude): Rewrite to use GetDumpFile. (DeclareTypesConstantsProceduresInRange): Remove fixed debugging values. (PreAddModGcc): Rename parameter t as tree. (IncludeGetNth): Rewrite to use GetDumpFile. (IncludeType): Ditto. (IncludeSubscript): Ditto. (PrintLocalSymbol): Ditto. (PrintLocalSymbols): Ditto. (IncludeGetVarient): Ditto. (PrintDeclared): Ditto. (PrintAlignment): Ditto. (PrintDecl): Ditto. (PrintScope): Ditto. (PrintProcedure): Ditto. (PrintSym): Ditto. (PrintSymbol): Ditto. (PrintTerse): Ditto. * gm2-compiler/M2Options.def (GetDumpLangDeclFilename): New procedure function. (SetDumpLangDeclFilename): New procedure. (GetDumpLangQuadFilename): New procedure function. (SetDumpLangQuadFilename): New procedure. (GetDumpLangGimpleFilename): New procedure function. (SetDumpLangGimpleFilename): New procedure. (SetM2DumpFilter): New procedure. (GetM2DumpFilter): New procedure function. (GetDumpLangGimple): New procedure function. * gm2-compiler/M2Options.mod (GetDumpLangDeclFilename): New procedure function. (SetDumpLangDeclFilename): New procedure. (GetDumpLangQuadFilename): New procedure function. (SetDumpLangQuadFilename): New procedure. (GetDumpLangGimpleFilename): New procedure function. (SetDumpLangGimpleFilename): New procedure. (SetM2DumpFilter): New procedure. (GetM2DumpFilter): New procedure function. (GetDumpLangGimple): New procedure function. * gm2-compiler/M2Quads.def (DumpQuadruples): New procedure. * gm2-compiler/M2Quads.mod (DumpUntil): New procedure. (GetCtorInit): New procedure function. (GetCtorFini): New procedure function. (DumpQuadrupleFilter): New procedure function. (DumpQuadrupleAll): New procedure. (DisplayQuadList): Remove procedure. (DumpQuadruples): New procedure. (DisplayQuadRange): Rewrite. (DisplayQuad): Ditto. (DisplayProcedureAttributes): Ditto. (WriteOperator): Ditto. (WriteMode): Ditto. * gm2-compiler/M2Scope.mod (ForeachScopeBlockDo2): Replace DisplayQuadruples with TraceQuadruples. (ForeachScopeBlockDo3): Replace DisplayQuadruples with TraceQuadruples. * gm2-compiler/SymbolConversion.def (Gcc2Mod): New procedure function. * gm2-compiler/SymbolConversion.mod: New procedure function. * gm2-gcc/m2misc.cc (m2misc_DebugTree): New function. (m2misc_DebugTreeChain): New function. * gm2-gcc/m2options.h (M2Options_GetDumpLangDeclF
[gcc r14-9600] analyzer: fix ignored constraints involving casts [PR113619]
https://gcc.gnu.org/g:7a5a4a4467b2e18ff4fe24f565e120280d3e6ba7 commit r14-9600-g7a5a4a4467b2e18ff4fe24f565e120280d3e6ba7 Author: David Malcolm Date: Thu Mar 21 17:48:38 2024 -0400 analyzer: fix ignored constraints involving casts [PR113619] gcc/analyzer/ChangeLog: PR analyzer/113619 * region-model.cc (region_model::eval_condition): Fix cast-handling from r14-3632-ge7b267444045c5 so that if those give an unknown result, we continue trying the constraint manager. gcc/testsuite/ChangeLog: PR analyzer/113619 * c-c++-common/analyzer/taint-divisor-pr113619.c: New test. Signed-off-by: David Malcolm Diff: --- gcc/analyzer/region-model.cc | 24 -- .../c-c++-common/analyzer/taint-divisor-pr113619.c | 29 ++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index c3a4ec7bcfc..902b887fc07 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -4704,17 +4704,27 @@ region_model::eval_condition (const svalue *lhs, if (lhs_un_op && CONVERT_EXPR_CODE_P (lhs_un_op->get_op ()) && rhs_un_op && CONVERT_EXPR_CODE_P (rhs_un_op->get_op ()) && lhs_type == rhs_type) - return eval_condition (lhs_un_op->get_arg (), -op, -rhs_un_op->get_arg ()); - + { + tristate res = eval_condition (lhs_un_op->get_arg (), + op, + rhs_un_op->get_arg ()); + if (res.is_known ()) + return res; + } else if (lhs_un_op && CONVERT_EXPR_CODE_P (lhs_un_op->get_op ()) && lhs_type == rhs_type) - return eval_condition (lhs_un_op->get_arg (), op, rhs); - + { + tristate res = eval_condition (lhs_un_op->get_arg (), op, rhs); + if (res.is_known ()) + return res; + } else if (rhs_un_op && CONVERT_EXPR_CODE_P (rhs_un_op->get_op ()) && lhs_type == rhs_type) - return eval_condition (lhs, op, rhs_un_op->get_arg ()); + { + tristate res = eval_condition (lhs, op, rhs_un_op->get_arg ()); + if (res.is_known ()) + return res; + } } /* Otherwise, try constraints. diff --git a/gcc/testsuite/c-c++-common/analyzer/taint-divisor-pr113619.c b/gcc/testsuite/c-c++-common/analyzer/taint-divisor-pr113619.c new file mode 100644 index 000..15c881247ce --- /dev/null +++ b/gcc/testsuite/c-c++-common/analyzer/taint-divisor-pr113619.c @@ -0,0 +1,29 @@ +/* Reduced from false positive in Linux kernel's fs/ceph/ioctl.c: */ + +__extension__ typedef unsigned long long __u64; + +struct ceph_ioctl_layout +{ + __u64 stripe_unit, object_size; +}; +static long +__validate_layout(struct ceph_ioctl_layout* l) +{ + if ((l->object_size & ~(~(((1UL) << 12) - 1))) || + (l->stripe_unit & ~(~(((1UL) << 12) - 1))) || + ((unsigned)l->stripe_unit != 0 && + ((unsigned)l->object_size % (unsigned)l->stripe_unit))) /* { dg-bogus "use of attacker-controlled value 'l.stripe_unit' as divisor without checking for zero" "PR analyzer/113619" } */ +return -22; + return 0; +} + +long +__attribute__((tainted_args)) +ceph_ioctl_set_layout_policy(struct ceph_ioctl_layout l) +{ + int err; + err = __validate_layout(&l); + if (err) +return err; + return err; +}
[gcc r14-9602] PR modula2/114422 Attempting to declare a set of unknown type causes ICE
https://gcc.gnu.org/g:1542e8a44cc35e63233d3557afbf501c5ff84c55 commit r14-9602-g1542e8a44cc35e63233d3557afbf501c5ff84c55 Author: Gaius Mulley Date: Fri Mar 22 01:47:31 2024 + PR modula2/114422 Attempting to declare a set of unknown type causes ICE This patch corrects an error message directive which did not escape the { character. The patch also contains test cases to stress set declaration errors. gcc/m2/ChangeLog: PR modula2/114422 * gm2-compiler/M2Quads.mod (BuildConstructor): Add escape character. gcc/testsuite/ChangeLog: PR modula2/114422 * gm2/iso/fail/badset.mod: New test. * gm2/iso/fail/badset2.mod: New test. * gm2/iso/fail/badset3.mod: New test. Signed-off-by: Gaius Mulley Diff: --- gcc/m2/gm2-compiler/M2Quads.mod| 2 +- gcc/testsuite/gm2/iso/fail/badset.mod | 7 +++ gcc/testsuite/gm2/iso/fail/badset2.mod | 9 + gcc/testsuite/gm2/iso/fail/badset3.mod | 9 + 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/gcc/m2/gm2-compiler/M2Quads.mod b/gcc/m2/gm2-compiler/M2Quads.mod index ac654e89c91..52d72f6cd62 100644 --- a/gcc/m2/gm2-compiler/M2Quads.mod +++ b/gcc/m2/gm2-compiler/M2Quads.mod @@ -12172,7 +12172,7 @@ BEGIN IF type = NulSym THEN MetaErrorT0 (tokcbrpos, - '{%E}constructor requires a type before the opening {') + '{%E}constructor requires a type before the opening %{') ELSE ChangeToConstructor (tok, type) ; PutConstructorFrom (constValue, type) ; diff --git a/gcc/testsuite/gm2/iso/fail/badset.mod b/gcc/testsuite/gm2/iso/fail/badset.mod new file mode 100644 index 000..8bfc49f948e --- /dev/null +++ b/gcc/testsuite/gm2/iso/fail/badset.mod @@ -0,0 +1,7 @@ +MODULE badset ; + +VAR + set: set ; +BEGIN + +END badset. diff --git a/gcc/testsuite/gm2/iso/fail/badset2.mod b/gcc/testsuite/gm2/iso/fail/badset2.mod new file mode 100644 index 000..d883d1ffe75 --- /dev/null +++ b/gcc/testsuite/gm2/iso/fail/badset2.mod @@ -0,0 +1,9 @@ +MODULE badset2 ; + +TYPE + userset = SET OF CHAR ; +VAR + set: userset ; +BEGIN + set := set {} +END badset2. diff --git a/gcc/testsuite/gm2/iso/fail/badset3.mod b/gcc/testsuite/gm2/iso/fail/badset3.mod new file mode 100644 index 000..29131ff4c5a --- /dev/null +++ b/gcc/testsuite/gm2/iso/fail/badset3.mod @@ -0,0 +1,9 @@ +MODULE badset3 ; + +TYPE + userset = SET OF CHAR ; +VAR + set: userset ; +BEGIN + set := unknown {} +END badset3.
[gcc r14-9603] Move pr114396.c from gcc.target/i386 to gcc.c-torture/execute.
https://gcc.gnu.org/g:9a6c7aa1b011b77fcd9b19f7b8d7ff0fc823cdb2 commit r14-9603-g9a6c7aa1b011b77fcd9b19f7b8d7ff0fc823cdb2 Author: liuhongt Date: Fri Mar 22 10:09:43 2024 +0800 Move pr114396.c from gcc.target/i386 to gcc.c-torture/execute. Also fixed a typo in the testcase. gcc/testsuite/ChangeLog: PR tree-optimization/114396 * gcc.target/i386/pr114396.c: Move to... * gcc.c-torture/execute/pr114396.c: ...here. Diff: --- gcc/testsuite/{gcc.target/i386 => gcc.c-torture/execute}/pr114396.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr114396.c b/gcc/testsuite/gcc.c-torture/execute/pr114396.c similarity index 92% rename from gcc/testsuite/gcc.target/i386/pr114396.c rename to gcc/testsuite/gcc.c-torture/execute/pr114396.c index 4c4015f871f..baf90eafabf 100644 --- a/gcc/testsuite/gcc.target/i386/pr114396.c +++ b/gcc/testsuite/gcc.c-torture/execute/pr114396.c @@ -1,5 +1,5 @@ -/* { dg-do run } */ -/* { dg-options "-O1 -fwrapv -fno-vect-cost-model" } */ +/* PR tree-optimization/114396 */ +/* { dg-additional-options "-fwrapv -fno-vect-cost-model" } */ short a = 0xF; short b[16]; @@ -88,7 +88,7 @@ int main() { exp = foo1 (a); res = foo1_o3 (a); - if (uexp != ures) + if (exp != res) __builtin_abort (); uexp = foou (a);
[gcc r13-8488] Move pr114396.c from gcc.target/i386 to gcc.c-torture/execute.
https://gcc.gnu.org/g:e6a3d1f5bcfd954b614155d96c97bde8ac230e2e commit r13-8488-ge6a3d1f5bcfd954b614155d96c97bde8ac230e2e Author: liuhongt Date: Fri Mar 22 10:09:43 2024 +0800 Move pr114396.c from gcc.target/i386 to gcc.c-torture/execute. Also fixed a typo in the testcase. gcc/testsuite/ChangeLog: PR tree-optimization/114396 * gcc.target/i386/pr114396.c: Move to... * gcc.c-torture/execute/pr114396.c: ...here. (cherry picked from commit 9a6c7aa1b011b77fcd9b19f7b8d7ff0fc823cdb2) Diff: --- gcc/testsuite/{gcc.target/i386 => gcc.c-torture/execute}/pr114396.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr114396.c b/gcc/testsuite/gcc.c-torture/execute/pr114396.c similarity index 92% rename from gcc/testsuite/gcc.target/i386/pr114396.c rename to gcc/testsuite/gcc.c-torture/execute/pr114396.c index 4c4015f871f..baf90eafabf 100644 --- a/gcc/testsuite/gcc.target/i386/pr114396.c +++ b/gcc/testsuite/gcc.c-torture/execute/pr114396.c @@ -1,5 +1,5 @@ -/* { dg-do run } */ -/* { dg-options "-O1 -fwrapv -fno-vect-cost-model" } */ +/* PR tree-optimization/114396 */ +/* { dg-additional-options "-fwrapv -fno-vect-cost-model" } */ short a = 0xF; short b[16]; @@ -88,7 +88,7 @@ int main() { exp = foo1 (a); res = foo1_o3 (a); - if (uexp != ures) + if (exp != res) __builtin_abort (); uexp = foou (a);
[gcc r14-9604] RISC-V: Bugfix ICE for __attribute__((target("arch=+v"))
https://gcc.gnu.org/g:d3c24e9e55a7cf18df313a8b32b6de4b3ba81013 commit r14-9604-gd3c24e9e55a7cf18df313a8b32b6de4b3ba81013 Author: Pan Li Date: Mon Mar 18 11:21:29 2024 +0800 RISC-V: Bugfix ICE for __attribute__((target("arch=+v")) This patch would like to fix one ICE for __attribute__((target("arch=+v")) and likewise extension(s). Given we have sample code as below: void __attribute__((target("arch=+v"))) test_2 (int *a, int *b, int *out, unsigned count) { unsigned i; for (i = 0; i < count; i++) out[i] = a[i] + b[i]; } It will have ICE when build with -march=rv64gc -O3. test.c: In function ‘test_2’: test.c:4:1: internal compiler error: Floating point exception 4 | { | ^ 0x1a5891b crash_signal .../__RISC-V_BUILD__/../gcc/toplev.cc:319 0x7f0a7884251f ??? ./signal/../sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c:0 0x1f51ba4 riscv_hard_regno_nregs .../__RISC-V_BUILD__/../gcc/config/riscv/riscv.cc:8143 0x1967bb9 init_reg_modes_target() .../__RISC-V_BUILD__/../gcc/reginfo.cc:471 0x13fc029 init_emit_regs() .../__RISC-V_BUILD__/../gcc/emit-rtl.cc:6237 0x1a5b83d target_reinit() .../__RISC-V_BUILD__/../gcc/toplev.cc:1936 0x35e374d save_target_globals() .../__RISC-V_BUILD__/../gcc/target-globals.cc:92 0x35e381f save_target_globals_default_opts() .../__RISC-V_BUILD__/../gcc/target-globals.cc:122 0x1f544cc riscv_save_restore_target_globals(tree_node*) .../__RISC-V_BUILD__/../gcc/config/riscv/riscv.cc:9138 0x1f55c36 riscv_set_current_function ... There are two reasons for this ICE. 1. The implied extension(s) of v are not well handled and the TARGET_MIN_VLEN is 0 which is not reinitialized. Then the size / TARGET_MIN_VLEN will have DivideByZero. 2. The machine modes of the vector types will be vary after the v extension is introduced. This patch passed below testsuite: 1. The riscv fully regression test. PR target/114352 gcc/ChangeLog: * common/config/riscv/riscv-common.cc (riscv_subset_list::parse): Replace implied, combine and check to func finalize. (riscv_subset_list::finalize): New func impl to take care of implied, combine ext and related checks. * config/riscv/riscv-subset.h: Add func decl for finalize. * config/riscv/riscv-target-attr.cc (riscv_target_attr_parser::parse_arch): Finalize the ext before return succeed. * config/riscv/riscv.cc (riscv_set_current_function): Reinit the machine mode before when set cur function. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr114352-1.c: New test. * gcc.target/riscv/rvv/base/pr114352-2.c: New test. Signed-off-by: Pan Li Diff: --- gcc/common/config/riscv/riscv-common.cc| 31 gcc/config/riscv/riscv-subset.h| 2 + gcc/config/riscv/riscv-target-attr.cc | 2 + gcc/config/riscv/riscv.cc | 4 ++ .../gcc.target/riscv/rvv/base/pr114352-1.c | 58 ++ .../gcc.target/riscv/rvv/base/pr114352-2.c | 27 ++ 6 files changed, 114 insertions(+), 10 deletions(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 440127a2af0..15d44245b3c 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -1428,16 +1428,7 @@ riscv_subset_list::parse (const char *arch, location_t loc) if (p == NULL) goto fail; - for (itr = subset_list->m_head; itr != NULL; itr = itr->next) -{ - subset_list->handle_implied_ext (itr->name.c_str ()); -} - - /* Make sure all implied extensions are included. */ - gcc_assert (subset_list->check_implied_ext ()); - - subset_list->handle_combine_ext (); - subset_list->check_conflict_ext (); + subset_list->finalize (); return subset_list; @@ -1467,6 +1458,26 @@ riscv_subset_list::set_loc (location_t loc) m_loc = loc; } +/* Make sure the implied or combined extension is included after add + a new std extension to subset list or likewise. For exmaple as below, + + void __attribute__((target("arch=+v"))) func () with -march=rv64gc. + + The implied zvl128b and zve64d of the std v should be included. */ +void +riscv_subset_list::finalize () +{ + riscv_subset_t *subset; + + for (subset = m_head; subset != NULL; subset = subset->next) +handle_implied_ext (subset->name.c_str ()); + + gcc_assert (check_implied_ext ()); + + handle_combine_ext (); + check_conflict_ext (); +} + /* Return the current arch string. */ std::string diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h index ae849e2a302.
[gcc r14-9605] RISC-V: Bugfix function target attribute pollution
https://gcc.gnu.org/g:9941f0295a14659e25260458efd2e46a68ad0342 commit r14-9605-g9941f0295a14659e25260458efd2e46a68ad0342 Author: Pan Li Date: Tue Mar 19 09:43:24 2024 +0800 RISC-V: Bugfix function target attribute pollution This patch depends on below ICE fix. https://gcc.gnu.org/pipermail/gcc-patches/2024-March/647915.html The function target attribute should be on a per-function basis. For example, we have 3 function as below: void test_1 () {} void __attribute__((target("arch=+v"))) test_2 () {} void __attribute__((target("arch=+zfh"))) test_3 () {} void test_4 () {} The scope of the target attribute should not extend the function body. Aka, test_3 cannot have the 'v' extension, as well as the test_4 cannot have both the 'v' and 'zfh' extension. Unfortunately, for now the test_4 is able to leverage the 'v' and the 'zfh' extension which is incorrect. This patch would like to fix the sticking attribute by introduce the commandline subset_list. When parse_arch, we always clone from the cmdline_subset_list instead of the current_subset_list. Meanwhile, we correct the print information about arch like below. .option arch, rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zbb1p0 The riscv_declare_function_name hook is always after the hook riscv_process_target_attr. Thus, we introduce one hash_map to record the 1:1 mapping from fndel to its' subset_list in advance. And later the riscv_declare_function_name is able to get the right information about the arch. Below test are passed for this patch * The riscv fully regression test. PR target/114352 gcc/ChangeLog: * common/config/riscv/riscv-common.cc (struct riscv_func_target_info): New struct for func decl and target name. (struct riscv_func_target_hasher): New hasher for hash table mapping from the fn_decl to fn_target_name. (riscv_func_decl_hash): New func to compute the hash for fn_decl. (riscv_func_target_hasher::hash): New func to impl hash interface. (riscv_func_target_hasher::equal): New func to impl equal interface. (riscv_cmdline_subset_list): New static var for cmdline subset list. (riscv_func_target_table_lazy_init): New func to lazy init the func target hash table. (riscv_func_target_get): New func to get target name from hash table. (riscv_func_target_put): New func to put target name into hash table. (riscv_func_target_remove_and_destory): New func to remove target info from the hash table and destory it. (riscv_parse_arch_string): Set the static var cmdline_subset_list. * config/riscv/riscv-subset.h (riscv_cmdline_subset_list): New static var for cmdline subset list. (riscv_func_target_get): New func decl. (riscv_func_target_put): Ditto. (riscv_func_target_remove_and_destory): Ditto. * config/riscv/riscv-target-attr.cc (riscv_target_attr_parser::parse_arch): Take cmdline_subset_list instead of current_subset_list when clone. (riscv_process_target_attr): Record the func target info to hash table. (riscv_option_valid_attribute_p): Add new arg tree fndel. * config/riscv/riscv.cc (riscv_declare_function_name): Consume the func target info and print the arch message. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr114352-3.c: New test. Signed-off-by: Pan Li Diff: --- gcc/common/config/riscv/riscv-common.cc| 105 ++- gcc/config/riscv/riscv-subset.h| 4 + gcc/config/riscv/riscv-target-attr.cc | 18 +++- gcc/config/riscv/riscv.cc | 7 +- .../gcc.target/riscv/rvv/base/pr114352-3.c | 113 + 5 files changed, 240 insertions(+), 7 deletions(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 15d44245b3c..7095f303cbb 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -426,11 +426,108 @@ bool riscv_subset_list::parse_failed = false; static riscv_subset_list *current_subset_list = NULL; +static riscv_subset_list *cmdline_subset_list = NULL; + +struct riscv_func_target_info +{ + tree fn_decl; + std::string fn_target_name; + + riscv_func_target_info (const tree &decl, const std::string &target_name) +: fn_decl (decl), fn_target_name (target_name) + { + } +}; + +struct riscv_func_target_hasher : nofree_ptr_hash +{ + typedef tree compare_type; + + static hashval_t hash (value_type); + static bool equal (value_type, const compare_type &); +}; + +static hash_table *f
[gcc r14-9606] [committed] Fix RISC-V missing stack tie
https://gcc.gnu.org/g:c65046ff2ef0a9a46e59bc0b3369b2d226f6a239 commit r14-9606-gc65046ff2ef0a9a46e59bc0b3369b2d226f6a239 Author: Jeff Law Date: Thu Mar 21 20:41:59 2024 -0600 [committed] Fix RISC-V missing stack tie As some of you know, Raphael has been working on stack-clash support for the RISC-V port. A little while ago Florian reached out to us with an issue where glibc was failing its smoke test due to referencing an unallocated stack slot. Without diving into the code in detail I (incorrectly) concluded it was a problem with the fallback of using Ada's stack-check paths due to not having stack-clash support. Once enough stack-clash bits were ready I had Raphael review the code generated for Florian's test and we concluded the the original case from Florian was just wrong irrespective of stack clash/stack check. While Raphael's stack-clash work will indirectly fix Florian's case, it really should also work without stack-clash. In particular this code was called out by valgrind: > 0003cb5e : > __GI___realpath(): >3cb5e: 81010113addisp,sp,-2032 >3cb62: 7d313423sd s3,1992(sp) >3cb66: 79fdlui s3,0xf >3cb68: 7e813023sd s0,2016(sp) >3cb6c: 7c913c23sd s1,2008(sp) >3cb70: 7f010413addis0,sp,2032 >3cb74: 35098793addia5,s3,848 # f350 <__libc_initial+0xffe8946a> >3cb78: 74fdlui s1,0xf >3cb7a: 008789b3add s3,a5,s0 >3cb7e: f9048793addia5,s1,-112 # ef90 <__libc_initial+0xffe890aa> >3cb82: 008784b3add s1,a5,s0 >3cb86: 77fdlui a5,0xf >3cb88: 7d413023sd s4,1984(sp) >3cb8c: 7b513c23sd s5,1976(sp) >3cb90: 7e113423sd ra,2024(sp) >3cb94: 7d213823sd s2,2000(sp) >3cb98: 7b613823sd s6,1968(sp) >3cb9c: 7b713423sd s7,1960(sp) >3cba0: 7b813023sd s8,1952(sp) >3cba4: 79913c23sd s9,1944(sp) >3cba8: 79a13823sd s10,1936(sp) >3cbac: 79b13423sd s11,1928(sp) >3cbb0: 34878793addia5,a5,840 # f348 <__libc_initial+0xffe89462> >3cbb4: 4713li a4,1024 >3cbb8: 00132a17auipc s4,0x132 >3cbbc: ae0a3a03ld s4,-1312(s4) # 16e698 <__stack_chk_guard> >3cbc0: 01098893addia7,s3,16 >3cbc4: 42098693addia3,s3,1056 >3cbc8: b8040a93addis5,s0,-1152 >3cbcc: 97a2add a5,a5,s0 >3cbce: 000a3603ld a2,0(s4) >3cbd2: f8c43423sd a2,-120(s0) >3cbd6: 4601li a2,0 >3cbd8: 3d14b023sd a7,960(s1) >3cbdc: 3ce4b423sd a4,968(s1) >3cbe0: 7cd4b823sd a3,2000(s1) >3cbe4: 7ce4bc23sd a4,2008(s1) >3cbe8: b7543823sd s5,-1168(s0) >3cbec: b6e43c23sd a4,-1160(s0) >3cbf0: e38csd a1,0(a5) >3cbf2: b0010113addisp,sp,-1280 In particular note the store at 0x3cbd8. That's hitting (s1 + 960). If you chase the values around, you'll find it's a bit more than 1k into unallocated stack space. It's also worth noting the final stack adjustment at 0x3cbf2. While I haven't reproduced Florian's code exactly, I was able to get reasonably close and verify my suspicion that everything was fine before sched2 and incorrect after sched2. It was also obvious at that point what had gone wrong -- we were missing a stack tie after the final stack pointer adjustment. This patch adds the missing stack tie. While not technically a regression, I shudder at the thought of chasing one of these issues down again in the wild. Been there, done that. Regression tested on rv64gc. Verified the scheduler no longer mucked up realpath by hand. Pushing to the trunk. gcc/ * config/riscv/riscv.cc (riscv_expand_prologue): Add missing stack
[gcc(refs/users/meissner/heads/work163-dmf)] Add support for XVRL instruction.
https://gcc.gnu.org/g:6ff874d066d523bd6b71e2f944f5740f651ed022 commit 6ff874d066d523bd6b71e2f944f5740f651ed022 Author: Michael Meissner Date: Thu Mar 21 23:39:11 2024 -0400 Add support for XVRL instruction. 2024-03-21 Michael Meissner gcc/ * config/rs6000/altivec.md (xvrlw): New insn. Diff: --- gcc/config/rs6000/altivec.md | 11 +++ 1 file changed, 11 insertions(+) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 4d4c94ff0a0..da5db49d3af 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1883,6 +1883,17 @@ } [(set_attr "type" "vecperm")]) +;; Future cpu adds a vector rotate left word variant +(define_insn "*xvrlw" + [(set (match_operand:V4SI 0 "register_operand" "=v,wa") + (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa") +(match_operand:V4SI 2 "register_operand" "v,wa")))] + "TARGET_FUTURE" + "@ + vrlw %0,%1,%2 + xvrlw %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + (define_insn "altivec_vrl" [(set (match_operand:VI2 0 "register_operand" "=v") (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
[gcc(refs/users/meissner/heads/work163-dmf)] Revert all changes
https://gcc.gnu.org/g:d49aa664ce768f629c858158eca406991a66da85 commit d49aa664ce768f629c858158eca406991a66da85 Author: Michael Meissner Date: Thu Mar 21 23:40:14 2024 -0400 Revert all changes Diff: --- gcc/config/rs6000/altivec.md | 11 --- 1 file changed, 11 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index da5db49d3af..4d4c94ff0a0 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1883,17 +1883,6 @@ } [(set_attr "type" "vecperm")]) -;; Future cpu adds a vector rotate left word variant -(define_insn "*xvrlw" - [(set (match_operand:V4SI 0 "register_operand" "=v,wa") - (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa") -(match_operand:V4SI 2 "register_operand" "v,wa")))] - "TARGET_FUTURE" - "@ - vrlw %0,%1,%2 - xvrlw %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) - (define_insn "altivec_vrl" [(set (match_operand:VI2 0 "register_operand" "=v") (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
[gcc(refs/users/meissner/heads/work163-dmf)] Add support for XVRL instruction.
https://gcc.gnu.org/g:145bc7b00c10cf6e59897aba7f61c3a24c85ca0b commit 145bc7b00c10cf6e59897aba7f61c3a24c85ca0b Author: Michael Meissner Date: Thu Mar 21 23:49:02 2024 -0400 Add support for XVRL instruction. 2024-03-21 Michael Meissner gcc/ * config/rs6000/altivec.md (futue_xvrlw): New insn. Diff: --- gcc/config/rs6000/altivec.md | 14 ++ 1 file changed, 14 insertions(+) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 4d4c94ff0a0..afe3d72316c 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1883,6 +1883,20 @@ } [(set_attr "type" "vecperm")]) +;; -mcpu=future adds a vector rotate left word variant. There is no vector +;; byte/half-word/double-word/quad-word rotate left. This insn occurs before +;; altivec_vrl and will match for -mcpu=future, while other cpus will +;; match the generic insn. +(define_insn "*future_xvrlw" + [(set (match_operand:V4SI 0 "register_operand" "=v,wa") + (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa") +(match_operand:V4SI 2 "register_operand" "v,wa")))] + "TARGET_FUTURE" + "@ + vrlw %0,%1,%2 + xvrlw %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + (define_insn "altivec_vrl" [(set (match_operand:VI2 0 "register_operand" "=v") (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
[gcc(refs/users/meissner/heads/work163-dmf)] Revert all changes
https://gcc.gnu.org/g:889e9e1165b5f84cf84820ca9b1926548941aded commit 889e9e1165b5f84cf84820ca9b1926548941aded Author: Michael Meissner Date: Thu Mar 21 23:59:52 2024 -0400 Revert all changes Diff: --- gcc/config/rs6000/altivec.md | 14 - gcc/config/rs6000/constraints.md | 3 - gcc/config/rs6000/mma.md | 410 +- gcc/config/rs6000/predicates.md | 32 -- gcc/config/rs6000/rs6000-builtin.cc | 22 +- gcc/config/rs6000/rs6000-call.cc | 10 +- gcc/config/rs6000/rs6000-cpus.def | 2 - gcc/config/rs6000/rs6000-modes.def| 4 - gcc/config/rs6000/rs6000.cc | 318 - gcc/config/rs6000/rs6000.h| 50 +-- gcc/config/rs6000/rs6000.md | 2 - gcc/doc/md.texi | 5 - gcc/testsuite/gcc.target/powerpc/dm-1024bit.c | 63 gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 -- gcc/testsuite/lib/target-supports.exp | 23 -- 15 files changed, 166 insertions(+), 986 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index afe3d72316c..4d4c94ff0a0 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1883,20 +1883,6 @@ } [(set_attr "type" "vecperm")]) -;; -mcpu=future adds a vector rotate left word variant. There is no vector -;; byte/half-word/double-word/quad-word rotate left. This insn occurs before -;; altivec_vrl and will match for -mcpu=future, while other cpus will -;; match the generic insn. -(define_insn "*future_xvrlw" - [(set (match_operand:V4SI 0 "register_operand" "=v,wa") - (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa") -(match_operand:V4SI 2 "register_operand" "v,wa")))] - "TARGET_FUTURE" - "@ - vrlw %0,%1,%2 - xvrlw %x0,%x1,%x2" - [(set_attr "type" "vecsimple")]) - (define_insn "altivec_vrl" [(set (match_operand:VI2 0 "register_operand" "=v") (rotate:VI2 (match_operand:VI2 1 "register_operand" "v") diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 277a30a8245..369a7b75042 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -107,9 +107,6 @@ (match_test "TARGET_P8_VECTOR") (match_operand 0 "s5bit_cint_operand"))) -(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]" - "Accumulator register.") - (define_constraint "wE" "@internal Vector constant that can be loaded with the XXSPLTIB instruction." (match_test "xxspltib_constant_nosplit (op, mode)")) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 4f9c59046ea..04e2d0066df 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -91,11 +91,6 @@ UNSPEC_MMA_XVI8GER4SPP UNSPEC_MMA_XXMFACC UNSPEC_MMA_XXMTACC - UNSPEC_DM_INSERT512_UPPER - UNSPEC_DM_INSERT512_LOWER - UNSPEC_DM_EXTRACT512 - UNSPEC_DMR_RELOAD_FROM_MEMORY - UNSPEC_DMR_RELOAD_TO_MEMORY ]) (define_c_enum "unspecv" @@ -229,47 +224,44 @@ (UNSPEC_MMA_XVF64GERNP "xvf64gernp") (UNSPEC_MMA_XVF64GERNN "xvf64gernn")]) -;; The "pm" prefix is not in these expansions, so that we can generate -;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems -;; without dense math registers. -(define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")]) +(define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")]) -(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "xvi4ger8pp")]) +(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "pmxvi4ger8pp")]) -(define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2"xvi16ger2") -(UNSPEC_MMA_PMXVI16GER2S "xvi16ger2s") -(UNSPEC_MMA_PMXVF16GER2"xvf16ger2") -(UNSPEC_MMA_PMXVBF16GER2 "xvbf16ger2")]) +(define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2"pmxvi16ger2") +(UNSPEC_MMA_PMXVI16GER2S "pmxvi16ger2s") +(UNSPEC_MMA_PMXVF16GER2"pmxvf16ger2") +(UNSPEC_MMA_PMXVBF16GER2 "pmxvbf16ger2")]) -(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "xvi16ger2pp") -(UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp") -(UNSPEC_MMA_PMXVF16GER2PP "xvf16ger2pp") -(UNSPEC_MMA_PMXVF16GER2PN "xvf16ger2pn") -(UNSPEC_MMA_PMXVF16GER2NP "xvf16ger2np") -(UNSPEC_MMA_PMXVF16GER2NN "xv
[gcc(refs/users/meissner/heads/work163-dmf)] Add wD constraint.
https://gcc.gnu.org/g:935c71424570d5e85a85edb9a1516c3ef578a6f8 commit 935c71424570d5e85a85edb9a1516c3ef578a6f8 Author: Michael Meissner Date: Fri Mar 22 00:14:19 2024 -0400 Add wD constraint. This patch adds a new constraint ('wD') that matches the accumulator registers that overlap with VSX registers 0..31 on power10. Future patches will add the support for a separate accumulator register class that will be used when the support for dense math registes is added. 2024-03-22 Michael Meissner * config/rs6000/constraints.md (wD): New constraint. * config/rs6000/mma.md (mma_disassemble_acc): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")] + [(set (match_operand:XO 0 "accumulator_operand" "=&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")] MMA_ACC))] "TARGET_MMA" " %A0" @@ -515,7 +513,7 @@ ;; UNSPEC_VOLATILE. (define_insn "mma_xxsetaccz" - [(set (match_operand:XO 0 "fpr_reg_operand" "=d") + [(set (match_operand:XO 0 "accumulator_operand" "=wD") (unspec_volatile:XO [(const_int 0)] UNSPECV_MMA_XXSETACCZ))] "TARGET_MMA" @@ -523,7 +521,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_VV))] @@ -532,8 +530,8 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_AVV))] @@ -542,7 +540,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_PV))] @@ -551,8 +549,8 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:OO 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_APV))] @@ -561,7 +559,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -574,8 +572,8 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -588,7 +586,7 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -601,8 +599,8 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0"
[gcc(refs/users/meissner/heads/work163-dmf)] Revert all changes
https://gcc.gnu.org/g:8ed4af0366a9b0d8670b96dfba25b85d3ba9b65c commit 8ed4af0366a9b0d8670b96dfba25b85d3ba9b65c Author: Michael Meissner Date: Fri Mar 22 00:16:08 2024 -0400 Revert all changes Diff: --- gcc/config/rs6000/constraints.md | 3 --- gcc/config/rs6000/mma.md | 54 --- gcc/config/rs6000/predicates.md | 15 --- gcc/config/rs6000/rs6000-c.cc | 9 ++- gcc/config/rs6000/rs6000-cpus.def | 2 -- gcc/config/rs6000/rs6000.cc | 7 + gcc/config/rs6000/rs6000.h| 1 - gcc/doc/md.texi | 5 8 files changed, 31 insertions(+), 65 deletions(-) diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 277a30a8245..369a7b75042 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -107,9 +107,6 @@ (match_test "TARGET_P8_VECTOR") (match_operand 0 "s5bit_cint_operand"))) -(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]" - "Accumulator register.") - (define_constraint "wE" "@internal Vector constant that can be loaded with the XXSPLTIB instruction." (match_test "xxspltib_constant_nosplit (op, mode)")) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 49cf5f8fe43..04e2d0066df 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -452,7 +452,8 @@ (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa") (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")] UNSPECV_MMA_ASSEMBLE))] - "TARGET_MMA" + "TARGET_MMA + && fpr_reg_operand (operands[0], XOmode)" "#" "&& reload_completed" [(const_int 0)] @@ -485,7 +486,8 @@ (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d") (match_operand 2 "const_0_to_3_operand")] UNSPEC_MMA_EXTRACT))] - "TARGET_MMA" + "TARGET_MMA + && fpr_reg_operand (operands[1], XOmode)" "#" "&& reload_completed" [(const_int 0)] @@ -502,8 +504,8 @@ ;; the accumulator. We enforce this by marking the output as early clobber. (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")] MMA_ACC))] "TARGET_MMA" " %A0" @@ -513,7 +515,7 @@ ;; UNSPEC_VOLATILE. (define_insn "mma_xxsetaccz" - [(set (match_operand:XO 0 "accumulator_operand" "=wD") + [(set (match_operand:XO 0 "fpr_reg_operand" "=d") (unspec_volatile:XO [(const_int 0)] UNSPECV_MMA_XXSETACCZ))] "TARGET_MMA" @@ -521,7 +523,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_VV))] @@ -530,8 +532,8 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_AVV))] @@ -540,7 +542,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_PV))] @@ -549,8 +551,8 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") (match_operand:OO 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_APV))] @@ -559,7 +561,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -572,8 +574,8 @@ (set_attr "prefixed" "yes")])
[gcc(refs/users/meissner/heads/work163-dmf)] Use vector pair load/store for memcpy with -mcpu=future
https://gcc.gnu.org/g:e9f36e1c173a1967318ccfc2341e46b526d6f0aa commit e9f36e1c173a1967318ccfc2341e46b526d6f0aa Author: Michael Meissner Date: Fri Mar 22 00:16:39 2024 -0400 Use vector pair load/store for memcpy with -mcpu=future In the development for the power10 processor, GCC did not enable using the load vector pair and store vector pair instructions when optimizing things like memory copy. This patch enables using those instructions if -mcpu=future is used. 2024-03-22 Michael Meissner gcc/ * config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Enable using load vector pair and store vector pair instructions for memory copy operations. (POWERPC_MASKS): Make the bit for enabling using load vector pair and store vector pair operations set and reset when the PowerPC processor is changed. Diff: --- gcc/config/rs6000/rs6000-cpus.def | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 47365534af8..4ddba142e44 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -90,6 +90,7 @@ | OPTION_MASK_POWER11) #define ISA_FUTURE_MASKS_SERVER(ISA_POWER11_MASKS_SERVER \ +| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\ | OPTION_MASK_FUTURE) /* Flags that need to be turned off if -mno-vsx. */ @@ -121,6 +122,7 @@ /* Mask of all options to set the default isa flags based on -mcpu=. */ #define POWERPC_MASKS (OPTION_MASK_ALTIVEC\ +| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\ | OPTION_MASK_CMPB \ | OPTION_MASK_CRYPTO \ | OPTION_MASK_DFP \
[gcc(refs/users/meissner/heads/work163-dmf)] Add wD constraint.
https://gcc.gnu.org/g:ca9dad30fef736a655999cb4ab1cbe1d8cdd20f1 commit ca9dad30fef736a655999cb4ab1cbe1d8cdd20f1 Author: Michael Meissner Date: Fri Mar 22 00:19:46 2024 -0400 Add wD constraint. This patch adds a new constraint ('wD') that matches the accumulator registers that overlap with VSX registers 0..31 on power10. Future patches will add the support for a separate accumulator register class that will be used when the support for dense math registes is added. 2024-03-22 Michael Meissner * config/rs6000/constraints.md (wD): New constraint. * config/rs6000/mma.md (mma_disassemble_acc): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")] + [(set (match_operand:XO 0 "accumulator_operand" "=&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")] MMA_ACC))] "TARGET_MMA" " %A0" @@ -515,7 +513,7 @@ ;; UNSPEC_VOLATILE. (define_insn "mma_xxsetaccz" - [(set (match_operand:XO 0 "fpr_reg_operand" "=d") + [(set (match_operand:XO 0 "accumulator_operand" "=wD") (unspec_volatile:XO [(const_int 0)] UNSPECV_MMA_XXSETACCZ))] "TARGET_MMA" @@ -523,7 +521,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_VV))] @@ -532,8 +530,8 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_AVV))] @@ -542,7 +540,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_PV))] @@ -551,8 +549,8 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:OO 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_APV))] @@ -561,7 +559,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -574,8 +572,8 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -588,7 +586,7 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -601,8 +599,8 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0"
[gcc(refs/users/meissner/heads/work163-dmf)] Revert all changes
https://gcc.gnu.org/g:90b5e76409a32556b7672139d6a2d031d0d5937e commit 90b5e76409a32556b7672139d6a2d031d0d5937e Author: Michael Meissner Date: Fri Mar 22 00:21:34 2024 -0400 Revert all changes Diff: --- gcc/config/rs6000/constraints.md | 3 --- gcc/config/rs6000/mma.md | 54 --- gcc/config/rs6000/predicates.md | 15 --- gcc/config/rs6000/rs6000-c.cc | 9 ++- gcc/config/rs6000/rs6000-cpus.def | 2 -- gcc/config/rs6000/rs6000.cc | 7 + gcc/config/rs6000/rs6000.h| 1 - gcc/doc/md.texi | 5 8 files changed, 31 insertions(+), 65 deletions(-) diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 277a30a8245..369a7b75042 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -107,9 +107,6 @@ (match_test "TARGET_P8_VECTOR") (match_operand 0 "s5bit_cint_operand"))) -(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]" - "Accumulator register.") - (define_constraint "wE" "@internal Vector constant that can be loaded with the XXSPLTIB instruction." (match_test "xxspltib_constant_nosplit (op, mode)")) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 49cf5f8fe43..04e2d0066df 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -452,7 +452,8 @@ (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa") (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")] UNSPECV_MMA_ASSEMBLE))] - "TARGET_MMA" + "TARGET_MMA + && fpr_reg_operand (operands[0], XOmode)" "#" "&& reload_completed" [(const_int 0)] @@ -485,7 +486,8 @@ (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d") (match_operand 2 "const_0_to_3_operand")] UNSPEC_MMA_EXTRACT))] - "TARGET_MMA" + "TARGET_MMA + && fpr_reg_operand (operands[1], XOmode)" "#" "&& reload_completed" [(const_int 0)] @@ -502,8 +504,8 @@ ;; the accumulator. We enforce this by marking the output as early clobber. (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")] MMA_ACC))] "TARGET_MMA" " %A0" @@ -513,7 +515,7 @@ ;; UNSPEC_VOLATILE. (define_insn "mma_xxsetaccz" - [(set (match_operand:XO 0 "accumulator_operand" "=wD") + [(set (match_operand:XO 0 "fpr_reg_operand" "=d") (unspec_volatile:XO [(const_int 0)] UNSPECV_MMA_XXSETACCZ))] "TARGET_MMA" @@ -521,7 +523,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_VV))] @@ -530,8 +532,8 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_AVV))] @@ -540,7 +542,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_PV))] @@ -549,8 +551,8 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") (match_operand:OO 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_APV))] @@ -559,7 +561,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -572,8 +574,8 @@ (set_attr "prefixed" "yes")])
[gcc(refs/users/meissner/heads/work163-dmf)] Use vector pair load/store for memcpy with -mcpu=future
https://gcc.gnu.org/g:385b09d7c32c2cb7a3d16f738e870d1d0bad6997 commit 385b09d7c32c2cb7a3d16f738e870d1d0bad6997 Author: Michael Meissner Date: Fri Mar 22 00:22:20 2024 -0400 Use vector pair load/store for memcpy with -mcpu=future In the development for the power10 processor, GCC did not enable using the load vector pair and store vector pair instructions when optimizing things like memory copy. This patch enables using those instructions if -mcpu=future is used. 2024-03-22 Michael Meissner gcc/ * config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Enable using load vector pair and store vector pair instructions for memory copy operations. (POWERPC_MASKS): Make the bit for enabling using load vector pair and store vector pair operations set and reset when the PowerPC processor is changed. Diff: --- gcc/config/rs6000/rs6000-cpus.def | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 47365534af8..4ddba142e44 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -90,6 +90,7 @@ | OPTION_MASK_POWER11) #define ISA_FUTURE_MASKS_SERVER(ISA_POWER11_MASKS_SERVER \ +| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\ | OPTION_MASK_FUTURE) /* Flags that need to be turned off if -mno-vsx. */ @@ -121,6 +122,7 @@ /* Mask of all options to set the default isa flags based on -mcpu=. */ #define POWERPC_MASKS (OPTION_MASK_ALTIVEC\ +| OPTION_MASK_BLOCK_OPS_VECTOR_PAIR\ | OPTION_MASK_CMPB \ | OPTION_MASK_CRYPTO \ | OPTION_MASK_DFP \
[gcc(refs/users/meissner/heads/work163-dmf)] Add wD constraint.
https://gcc.gnu.org/g:1aef3129fb903b6ca80746e0fcffdcf2c86728ee commit 1aef3129fb903b6ca80746e0fcffdcf2c86728ee Author: Michael Meissner Date: Fri Mar 22 00:22:56 2024 -0400 Add wD constraint. This patch adds a new constraint ('wD') that matches the accumulator registers that overlap with VSX registers 0..31 on power10. Future patches will add the support for a separate accumulator register class that will be used when the support for dense math registes is added. 2024-03-22 Michael Meissner * config/rs6000/constraints.md (wD): New constraint. * config/rs6000/mma.md (mma_disassemble_acc): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")] + [(set (match_operand:XO 0 "accumulator_operand" "=&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")] MMA_ACC))] "TARGET_MMA" " %A0" @@ -515,7 +513,7 @@ ;; UNSPEC_VOLATILE. (define_insn "mma_xxsetaccz" - [(set (match_operand:XO 0 "fpr_reg_operand" "=d") + [(set (match_operand:XO 0 "accumulator_operand" "=wD") (unspec_volatile:XO [(const_int 0)] UNSPECV_MMA_XXSETACCZ))] "TARGET_MMA" @@ -523,7 +521,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_VV))] @@ -532,8 +530,8 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_AVV))] @@ -542,7 +540,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_PV))] @@ -551,8 +549,8 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:OO 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_APV))] @@ -561,7 +559,7 @@ [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -574,8 +572,8 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -588,7 +586,7 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -601,8 +599,8 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0"
[gcc(refs/users/meissner/heads/work163-dmf)] Add support for dense math registers.
https://gcc.gnu.org/g:732fcb3a20b89b6cf405c4f7131c82de6bde8303 commit 732fcb3a20b89b6cf405c4f7131c82de6bde8303 Author: Michael Meissner Date: Fri Mar 22 00:27:47 2024 -0400 Add support for dense math registers. The MMA subsystem added the notion of accumulator registers as an optional feature of ISA 3.1 (power10). In ISA 3.1, these accumulators overlapped with the VSX registers 0..31, but logically the accumulator registers were separate from the FPR registers. In ISA 3.1, it was anticipated that in future systems, the accumulator registers may no overlap with the FPR registers. This patch adds the support for dense math registers as separate registers. This particular patch does not change the MMA support to use the accumulators within the dense math registers. This patch just adds the basic support for having separate DMRs. The next patch will switch the MMA support to use the accumulators if -mcpu=future is used. For testing purposes, I added an undocumented option '-mdense-math' to enable or disable the dense math support. This patch adds a new constraint (wD). If MMA is selected but dense math is not selected (i.e. -mcpu=power10), the wD constraint will allow access to accumulators that overlap with VSX registers 0..31. If both MMA and dense math are selected (i.e. -mcpu=future), the wD constraint will only allow dense math registers. This patch modifies the existing %A output modifier. If MMA is selected but dense math is not selected, then %A output modifier converts the VSX register number to the accumulator number, by dividing it by 4. If both MMA and dense math are selected, then %A will map the separate DMR registers into 0..7. The intention is that user code using extended asm can be modified to run on both MMA without dense math and MMA with dense math: 1) If possible, don't use extended asm, but instead use the MMA built-in functions; 2) If you do need to write extended asm, change the d constraints targetting accumulators should now use wD; 3) Only use the built-in zero, assemble and disassemble functions create move data between vector quad types and dense math accumulators. I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the extended asm code. The reason is these instructions assume there is a 1-to-1 correspondence between 4 adjacent FPR registers and an accumulator that overlaps with those instructions. With accumulators now being separate registers, there no longer is a 1-to-1 correspondence. It is possible that the mangling for DMRs and the GDB register numbers may produce other changes in the future. 2024-03-22 Michael Meissner * config/rs6000/mma.md (movxo): Add comments about dense math registers. (movxo_nodm): Rename from movxo and restrict the usage to machines without dense math registers. (movxo_dm): New insn for movxo support for machines with dense math registers. (mma_): Restrict usage to machines without dense math registers. (mma_xxsetaccz): Make a define_expand, and add support for dense math registers. (mma_xxsetaccz_nodm): Rename from mma_xxsetaccz, and restrict to machines without dense math registers. (mma_dmsetaccz): New insn. * config/rs6000/predicates.md (dmr_operand): New predicate. (accumulator_operand): Add support for dense math registers. * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): Do not de-prime accumulator when disassembling a vector quad. * config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): Define __DENSE_MATH__ if we have dense math registers. * config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE. (enum rs6000_reload_reg_type): Add RELOAD_REG_DMR. (LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD constraint. (reload_reg_map): Likewise. (rs6000_reg_names): Likewise. (alt_reg_names): Likewise. (rs6000_hard_regno_nregs_internal): Likewise. (rs6000_hard_regno_mode_ok_uncached): Likewise. (rs6000_debug_reg_global): Likewise. (rs6000_setup_reg_addr_masks): Likewise. (rs6000_init_hard_regno_mode_ok): Likewise. (rs6000_secondary_reload_memory): Add support for DMR registers. (rs6000_secondary_reload_simple_move): Likewise. (rs6000_preferred_reload_class): Likewise. (rs6000_secondary_reload_class): Likewise. (print_operand): Make %A handle both FPRs and
[gcc(refs/users/meissner/heads/work163-dmf)] PowerPC: Switch to dense math names for all MMA operations.
https://gcc.gnu.org/g:f2c0b60fd88b3108e4225a2dceac25832d8537b5 commit f2c0b60fd88b3108e4225a2dceac25832d8537b5 Author: Michael Meissner Date: Fri Mar 22 00:29:03 2024 -0400 PowerPC: Switch to dense math names for all MMA operations. This patch changes the assembler instruction names for MMA instructions from the original name used in power10 to the new name when used with the dense math system. I.e. xvf64gerpp becomes dmxvf64gerpp. The assembler will emit the same bits for either spelling. For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the instruction. However, the prefixed instructions have a 'pm' prefix, and we add the 'dm' prefix afterwards. To prevent having two sets of parallel int attributes, we remove the "pm" prefix from the instruction string in the attributes, and add it later, both in the insn name and in the output template. 2024-03-22 Michael Meissner gcc/ * config/rs6000/mma.md (vvi4i4i8): Change the instruction to not have a "pm" prefix. (avvi4i4i8): Likewise. (vvi4i4i2): Likewise. (avvi4i4i2): Likewise. (vvi4i4): Likewise. (avvi4i4): Likewise. (pvi4i2): Likewise. (apvi4i2): Likewise. (vvi4i4i4): Likewise. (avvi4i4i4): Likewise. (mma_xxsetaccz): Add support for running on DMF systems, generating the dense math instruction and using the dense math accumulators. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_): Likewise. (mma_pm): Add support for running on DMF systems, generating the dense math instruction and using the dense math accumulators. Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm' prefixes based on whether we have the original MMA specification or if we have dense math support. (mma_pm): Likewise. (mma_pm): Likewise. (mma_pm): Likewise. (mma_pm): Likewise. (mma_pm): Likewise. (mma_pm): Likewise. (mma_pm): Likewise. Diff: --- gcc/config/rs6000/mma.md | 161 +++ 1 file changed, 107 insertions(+), 54 deletions(-) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 2ce613b46cc..f3870eac51a 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -224,44 +224,47 @@ (UNSPEC_MMA_XVF64GERNP "xvf64gernp") (UNSPEC_MMA_XVF64GERNN "xvf64gernn")]) -(define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")]) +;; The "pm" prefix is not in these expansions, so that we can generate +;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems +;; without dense math registers. +(define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")]) -(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "pmxvi4ger8pp")]) +(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "xvi4ger8pp")]) -(define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2"pmxvi16ger2") -(UNSPEC_MMA_PMXVI16GER2S "pmxvi16ger2s") -(UNSPEC_MMA_PMXVF16GER2"pmxvf16ger2") -(UNSPEC_MMA_PMXVBF16GER2 "pmxvbf16ger2")]) +(define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2"xvi16ger2") +(UNSPEC_MMA_PMXVI16GER2S "xvi16ger2s") +(UNSPEC_MMA_PMXVF16GER2"xvf16ger2") +(UNSPEC_MMA_PMXVBF16GER2 "xvbf16ger2")]) -(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "pmxvi16ger2pp") -(UNSPEC_MMA_PMXVI16GER2SPP "pmxvi16ger2spp") -(UNSPEC_MMA_PMXVF16GER2PP "pmxvf16ger2pp") -(UNSPEC_MMA_PMXVF16GER2PN "pmxvf16ger2pn") -(UNSPEC_MMA_PMXVF16GER2NP "pmxvf16ger2np") -(UNSPEC_MMA_PMXVF16GER2NN "pmxvf16ger2nn") -(UNSPEC_MMA_PMXVBF16GER2PP "pmxvbf16ger2pp") -(UNSPEC_MMA_PMXVBF16GER2PN "pmxvbf16ger2pn") -(UNSPEC_MMA_PMXVBF16GER2NP "pmxvbf16ger2np") -(UNSPEC_MMA_PMXVBF16GER2NN "pmxvbf16ger2nn")]) +(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "xvi16ger2pp") +(UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp") +(UNSPEC_MMA_PMXVF16GER2PP "xvf16ger2pp") +(UNSPE
[gcc(refs/users/meissner/heads/work163-dmf)] Add dense math test for new instruction names.
https://gcc.gnu.org/g:cf80b10c5a0b0e109c4d00404e03ed6f3c0606c0 commit cf80b10c5a0b0e109c4d00404e03ed6f3c0606c0 Author: Michael Meissner Date: Fri Mar 22 00:31:11 2024 -0400 Add dense math test for new instruction names. 2024-03-22 Michael Meissner gcc/testsuite/ * gcc.target/powerpc/dm-double-test.c: New test. * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New target test. Diff: --- gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ++ gcc/testsuite/lib/target-supports.exp | 23 +++ 2 files changed, 217 insertions(+) diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c new file mode 100644 index 000..66c19779585 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c @@ -0,0 +1,194 @@ +/* Test derived from mma-double-1.c, modified for dense math. */ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_dense_math_ok } */ +/* { dg-options "-mdejagnu-cpu=future -O2" } */ + +#include +#include +#include + +typedef unsigned char vec_t __attribute__ ((vector_size (16))); +typedef double v4sf_t __attribute__ ((vector_size (16))); +#define SAVE_ACC(ACC, ldc, J) \ + __builtin_mma_disassemble_acc (result, ACC); \ + rowC = (v4sf_t *) &CO[0*ldc+J]; \ + rowC[0] += result[0]; \ + rowC = (v4sf_t *) &CO[1*ldc+J]; \ + rowC[0] += result[1]; \ + rowC = (v4sf_t *) &CO[2*ldc+J]; \ + rowC[0] += result[2]; \ + rowC = (v4sf_t *) &CO[3*ldc+J]; \ + rowC[0] += result[3]; + +void +DM (int m, int n, int k, double *A, double *B, double *C) +{ + __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7; + v4sf_t result[4]; + v4sf_t *rowC; + for (int l = 0; l < n; l += 4) +{ + double *CO; + double *AO; + AO = A; + CO = C; + C += m * 4; + for (int j = 0; j < m; j += 16) + { + double *BO = B; + __builtin_mma_xxsetaccz (&acc0); + __builtin_mma_xxsetaccz (&acc1); + __builtin_mma_xxsetaccz (&acc2); + __builtin_mma_xxsetaccz (&acc3); + __builtin_mma_xxsetaccz (&acc4); + __builtin_mma_xxsetaccz (&acc5); + __builtin_mma_xxsetaccz (&acc6); + __builtin_mma_xxsetaccz (&acc7); + unsigned long i; + + for (i = 0; i < k; i++) + { + vec_t *rowA = (vec_t *) & AO[i * 16]; + __vector_pair rowB; + vec_t *rb = (vec_t *) & BO[i * 4]; + __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); + __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); + __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); + __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]); + __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]); + __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]); + __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]); + __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]); + __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]); + } + SAVE_ACC (&acc0, m, 0); + SAVE_ACC (&acc2, m, 4); + SAVE_ACC (&acc1, m, 2); + SAVE_ACC (&acc3, m, 6); + SAVE_ACC (&acc4, m, 8); + SAVE_ACC (&acc6, m, 12); + SAVE_ACC (&acc5, m, 10); + SAVE_ACC (&acc7, m, 14); + AO += k * 16; + BO += k * 4; + CO += 16; + } + B += k * 4; +} +} + +void +init (double *matrix, int row, int column) +{ + for (int j = 0; j < column; j++) +{ + for (int i = 0; i < row; i++) + { + matrix[j * row + i] = (i * 16 + 2 + j) / 0.123; + } +} +} + +void +init0 (double *matrix, double *matrix1, int row, int column) +{ + for (int j = 0; j < column; j++) +for (int i = 0; i < row; i++) + matrix[j * row + i] = matrix1[j * row + i] = 0; +} + + +void +print (const char *name, const double *matrix, int row, int column) +{ + printf ("Matrix %s has %d rows and %d columns:\n", name, row, column); + for (int i = 0; i < row; i++) +{ + for (int j = 0; j < column; j++) + { + printf ("%f ", matrix[j * row + i]); + } + printf ("\n"); +} + printf ("\n"); +} + +int +main (int argc, char *argv[]) +{ + int rowsA, colsB, common; + int i, j, k; + int ret = 0; + + for (int t = 16; t <= 128; t += 16) +{ + for (int t1 = 4; t1 <= 16; t1 += 4) + { + rowsA = t; + colsB = t1; + common = 1; + /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */ + double A[rowsA * common]; + double B[common * colsB]; + double C[rowsA * colsB]; + double D[rowsA * colsB]; + + + init (A, rowsA, common); + init (B, common, colsB); + init0 (C, D, rowsA, colsB); + DM (rowsA, colsB, common, A, B, C); + +
[gcc(refs/users/meissner/heads/work163-dmf)] PowerPC: Add support for 1, 024 bit DMR registers.
https://gcc.gnu.org/g:365106aa952ceba51092df38086d212802b1fb5a commit 365106aa952ceba51092df38086d212802b1fb5a Author: Michael Meissner Date: Fri Mar 22 00:33:23 2024 -0400 PowerPC: Add support for 1,024 bit DMR registers. This patch is a prelimianry patch to add the full 1,024 bit dense math register (DMRs) for -mcpu=future. The MMA 512-bit accumulators map onto the top of the DMR register. This patch only adds the new 1,024 bit register support. It does not add support for any instructions that need 1,024 bit registers instead of 512 bit registers. I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit registers. The 'wD' constraint added in previous patches is used for these registers. I added support to do load and store of DMRs via the VSX registers, since there are no load/store dense math instructions. I added the new keyword '__dmr' to create 1,024 bit types that can be loaded into DMRs. At present, I don't have aliases for __dmr512 and __dmr1024 that we've discussed internally. The patches have been tested on both little and big endian systems. Can I check it into the master branch? 2024-03-22 Michael Meissner gcc/ * config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec. (UNSPEC_DM_INSERT512_LOWER): Likewise. (UNSPEC_DM_EXTRACT512): Likewise. (UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise. (UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise. (movtdo): New define_expand and define_insn_and_split to implement 1,024 bit DMR registers. (movtdo_insert512_upper): New insn. (movtdo_insert512_lower): Likewise. (movtdo_extract512): Likewise. (reload_dmr_from_memory): Likewise. (reload_dmr_to_memory): Likewise. * config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR support. (rs6000_init_builtins): Add support for __dmr keyword. * config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add support for TDOmode. (rs6000_function_arg): Likewise. * config/rs6000/rs6000-modes.def (TDOmode): New mode. * config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add support for TDOmode. (rs6000_hard_regno_mode_ok_uncached): Likewise. (rs6000_hard_regno_mode_ok): Likewise. (rs6000_modes_tieable_p): Likewise. (rs6000_debug_reg_global): Likewise. (rs6000_setup_reg_addr_masks): Likewise. (rs6000_init_hard_regno_mode_ok): Add support for TDOmode. Setup reload hooks for DMR mode. (reg_offset_addressing_ok_p): Add support for TDOmode. (rs6000_emit_move): Likewise. (rs6000_secondary_reload_simple_move): Likewise. (rs6000_preferred_reload_class): Likewise. (rs6000_secondary_reload_class): Likewise. (rs6000_mangle_type): Add mangling for __dmr type. (rs6000_dmr_register_move_cost): Add support for TDOmode. (rs6000_split_multireg_move): Likewise. (rs6000_invalid_conversion): Likewise. * config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode. (enum rs6000_builtin_type_index): Add DMR type nodes. (dmr_type_node): Likewise. (ptr_dmr_type_node): Likewise. gcc/testsuite/ * gcc.target/powerpc/dm-1024bit.c: New test. Diff: --- gcc/config/rs6000/mma.md | 154 ++ gcc/config/rs6000/rs6000-builtin.cc | 17 +++ gcc/config/rs6000/rs6000-call.cc | 10 +- gcc/config/rs6000/rs6000-modes.def| 4 + gcc/config/rs6000/rs6000.cc | 101 - gcc/config/rs6000/rs6000.h| 6 +- gcc/testsuite/gcc.target/powerpc/dm-1024bit.c | 63 +++ 7 files changed, 321 insertions(+), 34 deletions(-) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index f3870eac51a..4f9c59046ea 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -91,6 +91,11 @@ UNSPEC_MMA_XVI8GER4SPP UNSPEC_MMA_XXMFACC UNSPEC_MMA_XXMTACC + UNSPEC_DM_INSERT512_UPPER + UNSPEC_DM_INSERT512_LOWER + UNSPEC_DM_EXTRACT512 + UNSPEC_DMR_RELOAD_FROM_MEMORY + UNSPEC_DMR_RELOAD_TO_MEMORY ]) (define_c_enum "unspecv" @@ -770,3 +775,152 @@ } [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) + +;; TDOmode (__dmr keyword for 1,024 bit registers). +(define_expand "movtdo" + [(set (match_operand:TDO 0 "nonimmediate_operand") + (match_operand:TDO 1 "input_operand"))] + "TARGET_MMA_DENSE_MATH" +{ + rs6000_emit_move (operands[0], operands[1], TDOmode); + DONE; +}) + +(define_insn_and_split "*movtdo" + [(set (match_operand:TDO 0 "noni
[gcc(refs/users/meissner/heads/work163-dmf)] Add support for XVRL instruction.
https://gcc.gnu.org/g:7200cbd8395cb620028eeb6c6ef003f6064615e2 commit 7200cbd8395cb620028eeb6c6ef003f6064615e2 Author: Michael Meissner Date: Fri Mar 22 00:37:34 2024 -0400 Add support for XVRL instruction. 2024-03-22 Michael Meissner gcc/ * config/rs6000/altivec.md (xvrlw): New insn. Diff: --- gcc/config/rs6000/altivec.md | 14 ++ 1 file changed, 14 insertions(+) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 4d4c94ff0a0..bf01af15286 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1883,6 +1883,20 @@ } [(set_attr "type" "vecperm")]) +;; -mcpu=future adds a vector rotate left word variant. There is no vector +;; byte/half-word/double-word/quad-word rotate left. This insn occurs before +;; altivec_vrl and will match for -mcpu=future, while other cpus will +;; match the generic insn. +(define_insn "*xvrlw" + [(set (match_operand:V4SI 0 "register_operand" "=v,wa") + (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa") +(match_operand:V4SI 2 "register_operand" "v,wa")))] + "TARGET_FUTURE" + "@ + vrlw %0,%1,%2 + xvrlw %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + (define_insn "altivec_vrl" [(set (match_operand:VI2 0 "register_operand" "=v") (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
[gcc(refs/users/meissner/heads/work163-dmf)] Update ChangeLog.*
https://gcc.gnu.org/g:7bedd90064489ea6db3a28999df34d7e1340dbb2 commit 7bedd90064489ea6db3a28999df34d7e1340dbb2 Author: Michael Meissner Date: Fri Mar 22 00:45:00 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.dmf | 45 + 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf index 5a28e3e994b..edc0448b14f 100644 --- a/gcc/ChangeLog.dmf +++ b/gcc/ChangeLog.dmf @@ -1,4 +1,14 @@ - Branch work163-dmf, patch #106 + Branch work163-dmf, patch #130 + +Add support for XVRL instruction. + +2024-03-22 Michael Meissner + +gcc/ + + * config/rs6000/altivec.md (xvrlw): New insn. + + Branch work163-dmf, patch #126 PowerPC: Add support for 1,024 bit DMR registers. @@ -20,7 +30,7 @@ don't have aliases for __dmr512 and __dmr1024 that we've discussed internally. The patches have been tested on both little and big endian systems. Can I check it into the master branch? -2024-03-19 Michael Meissner +2024-03-22 Michael Meissner gcc/ @@ -70,11 +80,11 @@ gcc/testsuite/ * gcc.target/powerpc/dm-1024bit.c: New test. - Branch work163-dmf, patch #105 + Branch work163-dmf, patch #125 Add dense math test for new instruction names. -2024-03-19 Michael Meissner +2024-03-22 Michael Meissner gcc/testsuite/ @@ -82,7 +92,7 @@ gcc/testsuite/ * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New target test. - Branch work163-dmf, patch #104 + Branch work163-dmf, patch #124 PowerPC: Switch to dense math names for all MMA operations. @@ -97,7 +107,7 @@ the 'dm' prefix afterwards. To prevent having two sets of parallel int attributes, we remove the "pm" prefix from the instruction string in the attributes, and add it later, both in the insn name and in the output template. -2024-03-19 Michael Meissner +2024-03-22 Michael Meissner gcc/ @@ -133,7 +143,7 @@ gcc/ (mma_pm): Likewise. (mma_pm): Likewise. - Branch work163-dmf, patch #103 + Branch work163-dmf, patch #123 Add support for dense math registers. @@ -184,7 +194,7 @@ both MMA without dense math and MMA with dense math: It is possible that the mangling for DMRs and the GDB register numbers may produce other changes in the future. -2024-03-19 Michael Meissner +2024-03-22 Michael Meissner * config/rs6000/mma.md (movxo): Add comments about dense math registers. (movxo_nodm): Rename from movxo and restrict the usage to machines @@ -201,6 +211,8 @@ produce other changes in the future. (accumulator_operand): Add support for dense math registers. * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): Do not de-prime accumulator when disassembling a vector quad. + * config/rs6000/rs6000-c.cc (rs6000_define_or_undefine_macro): Define + __DENSE_MATH__ if we have dense math registers. * config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE. (enum rs6000_reload_reg_type): Add RELOAD_REG_DMR. (LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD @@ -239,8 +251,10 @@ produce other changes in the future. (enum r6000_reg_class_enum): Add RS6000_CONSTRAINT_wD. (REGISTER_NAMES): Add DMR registers. (ADDITIONAL_REGISTER_NAMES): Likewise. + * config/rs6000/rs6000.md (FIRST_DMR_REGNO): New constant. + (LAST_DMR_REGNO): Likewise. - Branch work163-dmf, patch #102 + Branch work163-dmf, patch #122 Add wD constraint. @@ -249,7 +263,7 @@ that overlap with VSX registers 0..31 on power10. Future patches will add the support for a separate accumulator register class that will be used when the support for dense math registes is added. -2024-03-19 Michael Meissner +2024-03-22 Michael Meissner * config/rs6000/constraints.md (wD): New constraint. * config/rs6000/mma.md (mma_disassemble_acc): Likewise. @@ -276,7 +290,7 @@ support for dense math registes is added. the 'wD' constraint. * doc/md.texi (PowerPC constraints): Document the 'wD' constraint. - Branch work163-dmf, patch #101 + Branch work163-dmf, patch #121 Use vector pair load/store for memcpy with -mcpu=future @@ -285,7 +299,7 @@ vector pair and store vector pair instructions when optimizing things like memory copy. This patch enables using those instructions if -mcpu=future is use
[gcc(refs/users/meissner/heads/work163-dmf)] Support load/store vector with right length.
https://gcc.gnu.org/g:57f8bb746fcadd38e99dd9eee715f9ff15ad2822 commit 57f8bb746fcadd38e99dd9eee715f9ff15ad2822 Author: Michael Meissner Date: Fri Mar 22 00:48:43 2024 -0400 Support load/store vector with right length. This patch adds support for new instructions that may be added to the PowerPC architecture in the future to enhance the load and store vector with length instructions. The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to use since the count for the number of bytes must be in the top 8 bits of the GPR register, instead of the bottom 8 bits. This meant that code generating these instructions typically had to do a shift left by 56 bits to get the count into the right position. In a future version of the PowerPC architecture, new variants of these instructions might be added that expect the count to be in the bottom 8 bits of the GPR register. These patches add this support to GCC if the user uses the -mcpu=future option. I discovered that the code in rs6000-string.cc to generate ISA 3.1 lxvl/stxvl future lxvll/stxvll instructions would generate these instructions on 32-bit. However the patterns for these instructions is only done on 64-bit systems. So I added a check for 64-bit support before generating the instructions. The patches have been tested on both little and big endian systems. Can I check it into the master branch? 2024-03-22 Michael Meissner gcc/ * config/rs6000/rs6000-string.cc (expand_block_move): Do not generate lxvl and stxvl on 32-bit. * config/rs6000/vsx.md (lxvl): If -mcpu=future, generate the lxvl with the shift count automaticaly used in the insn. (lxvrl): New insn for -mcpu=future. (lxvrll): Likewise. (stxvl): If -mcpu=future, generate the stxvl with the shift count automaticaly used in the insn. (stxvrl): New insn for -mcpu=future. (stxvrll): Likewise. gcc/testsuite/ * gcc.target/powerpc/lxvrl.c: New test. * lib/target-supports.exp (check_effective_target_powerpc_future_ok): New effective target. Diff: --- gcc/config/rs6000/rs6000-string.cc | 1 + gcc/config/rs6000/vsx.md | 122 +-- gcc/testsuite/gcc.target/powerpc/lxvrl.c | 32 gcc/testsuite/lib/target-supports.exp| 12 +++ 4 files changed, 146 insertions(+), 21 deletions(-) diff --git a/gcc/config/rs6000/rs6000-string.cc b/gcc/config/rs6000/rs6000-string.cc index e74ccf41937..c6737e66cbe 100644 --- a/gcc/config/rs6000/rs6000-string.cc +++ b/gcc/config/rs6000/rs6000-string.cc @@ -2787,6 +2787,7 @@ expand_block_move (rtx operands[], bool might_overlap) if (TARGET_MMA && TARGET_BLOCK_OPS_UNALIGNED_VSX && TARGET_BLOCK_OPS_VECTOR_PAIR + && TARGET_POWERPC64 && bytes >= 32 && (align >= 256 || !STRICT_ALIGNMENT)) { diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index f135fa079bd..9520191e613 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -5629,20 +5629,32 @@ DONE; }) -;; Load VSX Vector with Length +;; Load VSX Vector with Length. If we have lxvrl, we don't have to do an +;; explicit shift left into a pseudo. (define_expand "lxvl" - [(set (match_dup 3) -(ashift:DI (match_operand:DI 2 "register_operand") - (const_int 56))) - (set (match_operand:V16QI 0 "vsx_register_operand") - (unspec:V16QI -[(match_operand:DI 1 "gpc_reg_operand") - (mem:V16QI (match_dup 1)) - (match_dup 3)] -UNSPEC_LXVL))] + [(use (match_operand:V16QI 0 "vsx_register_operand")) + (use (match_operand:DI 1 "gpc_reg_operand")) + (use (match_operand:DI 2 "gpc_reg_operand"))] "TARGET_P9_VECTOR && TARGET_64BIT" { - operands[3] = gen_reg_rtx (DImode); + rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56)); + rtx len; + + if (TARGET_FUTURE) +len = shift_len; + else +{ + len = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (len, shift_len)); +} + + rtx dest = operands[0]; + rtx addr = operands[1]; + rtx mem = gen_rtx_MEM (V16QImode, addr); + rtvec rv = gen_rtvec (3, addr, mem, len); + rtx lxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_LXVL); + emit_insn (gen_rtx_SET (dest, lxvl)); + DONE; }) (define_insn "*lxvl" @@ -5666,6 +5678,34 @@ "lxvll %x0,%1,%2" [(set_attr "type" "vecload")]) +;; For lxvrl and lxvrll, use the combiner to eliminate the shift. The +;; define_expand for lxvl will already incorporate the shift in generating the +;; insn. The lxvll buitl-in function required the user to have already done +;; the shift. Defining lxvrll this way, will optimize cases where the user has +;; done the shift immediately before the built-i
[gcc(refs/users/meissner/heads/work163-dmf)] Add saturating subtract built-ins.
https://gcc.gnu.org/g:0376ff1f351466628aa8fd9f304c6e8c6e3cdb82 commit 0376ff1f351466628aa8fd9f304c6e8c6e3cdb82 Author: Michael Meissner Date: Fri Mar 22 00:51:29 2024 -0400 Add saturating subtract built-ins. This patch adds support for a saturating subtract built-in function that may be added to a future PowerPC processor. Note, if it is added, the name of the built-in function may change before GCC 13 is released. If the name changes, we will submit a patch changing the name. I also added support for providing dense math built-in functions, even though at present, we have not added any new built-in functions for dense math. It is likely we will want to add new dense math built-in functions as the dense math support is fleshed out. The patches have been tested on both little and big endian systems. Can I check it into the master branch? 2024-03-22 Michael Meissner gcc/ * config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add support for flagging invalid use of future built-in functions. (rs6000_builtin_is_supported): Add support for future built-in functions. * config/rs6000/rs6000-builtins.def (__builtin_saturate_subtract32): New built-in function for -mcpu=future. (__builtin_saturate_subtract64): Likewise. * config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add stanzas for -mcpu=future built-ins. (stanza_map): Likewise. (enable_string): Likewise. (struct attrinfo): Likewise. (parse_bif_attrs): Likewise. (write_decls): Likewise. * config/rs6000/rs6000.md (sat_sub3): Add saturating subtract built-in insn declarations. (sat_sub3_dot): Likewise. (sat_sub3_dot2): Likewise. * doc/extend.texi (Future PowerPC built-ins): New section. gcc/testsuite/ * gcc.target/powerpc/subfus-1.c: New test. * gcc.target/powerpc/subfus-2.c: Likewise. Diff: --- gcc/config/rs6000/rs6000-builtin.cc | 17 gcc/config/rs6000/rs6000-builtins.def | 10 + gcc/config/rs6000/rs6000-gen-builtins.cc| 35 ++--- gcc/config/rs6000/rs6000.md | 60 + gcc/doc/extend.texi | 24 gcc/testsuite/gcc.target/powerpc/subfus-1.c | 32 +++ gcc/testsuite/gcc.target/powerpc/subfus-2.c | 32 +++ 7 files changed, 205 insertions(+), 5 deletions(-) diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index 976a42a74cd..1af38698bf3 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -139,6 +139,17 @@ rs6000_invalid_builtin (enum rs6000_gen_builtins fncode) case ENB_MMA: error ("%qs requires the %qs option", name, "-mmma"); break; +case ENB_FUTURE: + error ("%qs requires the %qs option", name, "-mcpu=future"); + break; +case ENB_FUTURE_64: + error ("%qs requires the %qs option and either the %qs or %qs option", +name, "-mcpu=future", "-m64", "-mpowerpc64"); + break; +case ENB_DM: + error ("%qs requires the %qs or %qs options", name, "-mcpu=future", +"-mdense-math"); + break; default: case ENB_ALWAYS: gcc_unreachable (); @@ -194,6 +205,12 @@ rs6000_builtin_is_supported (enum rs6000_gen_builtins fncode) return TARGET_HTM; case ENB_MMA: return TARGET_MMA; +case ENB_FUTURE: + return TARGET_FUTURE; +case ENB_FUTURE_64: + return TARGET_FUTURE && TARGET_POWERPC64; +case ENB_DM: + return TARGET_DENSE_MATH; default: gcc_unreachable (); } diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 3bc7fed6956..437ab0e09e9 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -139,6 +139,8 @@ ; endian Needs special handling for endianness ; ibmldRestrict usage to the case when TFmode is IBM-128 ; ibm128 Restrict usage to the case where __ibm128 is supported or if ibmld +; future Restrict usage to future instructions +; dm Restrict usage to dense math ; ; Each attribute corresponds to extra processing required when ; the built-in is expanded. All such special processing should @@ -4131,3 +4133,11 @@ void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); STXVP nothing {mma,pair} + +[future] + const signed int __builtin_saturate_subtract32 (signed int, signed int); + SAT_SUBSI sat_subsi3 {} + +[future-64] + const signed long __builtin_saturate_subtract64 (signed long, signed long); + SAT_SUBDI sat_subdi3 {} diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc b/gcc/config/rs6000/rs
[gcc(refs/users/meissner/heads/work163-dmf)] Add paddis support.
https://gcc.gnu.org/g:f453411169f9aaaf02b04c2c5cf843a608df8173 commit f453411169f9aaaf02b04c2c5cf843a608df8173 Author: Michael Meissner Date: Fri Mar 22 00:56:43 2024 -0400 Add paddis support. 2024-03-22 Michael Meissner gcc/ * config/rs6000/constraints.md (eU): New constraint. (eV): Likewise. * config/rs6000/predicates.md (paddis_operand): New predicate. (paddis_paddi_operand): Likewise. (add_operand): Add paddis support. * config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS): Add -mpaddis support. (POWERPC_MASKS): Likewise. * config/rs6000/rs6000.cc (num_insns_constant_gpr): Add -mpaddis support. (num_insns_constant_multi): Likewise. (print_operand): Add %B for paddis support. (rs6000_opt_masks): Add -mpaddis. & config/rs6000/rs6000.h (SIGNED_INTEGER_32BIT_P): New macro. * config/rs6000/rs6000.md (isa attribute): Add -mpaddis support. (enabled attribute); Likewise. (add3): Likewise. (adddi3 splitter): New splitter for paddis. (movdi_internal64): Add -mpaddis support. (movdi splitter): New splitter for -mpaddis. * config/rs6000/rs6000.opt (-mpaddis): New switch. Diff: --- gcc/config/rs6000/constraints.md | 10 + gcc/config/rs6000/predicates.md | 52 +- gcc/config/rs6000/rs6000.cc | 40 + gcc/config/rs6000/rs6000.h | 1 + gcc/config/rs6000/rs6000.md | 96 ++-- gcc/config/rs6000/rs6000.opt | 4 ++ 6 files changed, 189 insertions(+), 14 deletions(-) diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 277a30a8245..4d8d21fd6bb 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -222,6 +222,16 @@ "An IEEE 128-bit constant that can be loaded into VSX registers." (match_operand 0 "easy_vector_constant_ieee128")) +(define_constraint "eU" + "@internal integer constant that can be loaded with paddis" + (and (match_code "const_int") + (match_operand 0 "paddis_operand"))) + +(define_constraint "eV" + "@internal integer constant that can be loaded with paddis + paddi" + (and (match_code "const_int") + (match_operand 0 "paddis_paddi_operand"))) + ;; Floating-point constraints. These two are defined so that insn ;; length attributes can be calculated exactly. diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index b325000690b..0b7c0bf4b0f 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -369,6 +369,53 @@ return SIGNED_INTEGER_34BIT_P (INTVAL (op)); }) +;; Return 1 if op is a 64-bit constant that uses the paddis instruction +(define_predicate "paddis_operand" + (match_code "const_int") +{ + if (!TARGET_PADDIS && TARGET_POWERPC64) +return 0; + + /* If addi, addis, or paddi can handle the number, don't return true. */ + HOST_WIDE_INT value = INTVAL (op); + if (SIGNED_INTEGER_34BIT_P (value)) +return false; + + /* If the number is too large for padds, return false. */ + if (!SIGNED_INTEGER_32BIT_P (value >> 32)) +return false; + + /* If the bottom 32-bits are non-zero, paddis can't handle it. */ + if ((value & HOST_WIDE_INT_C(0x)) != 0) +return false; + + return true; +}) + +;; Return 1 if op is a 64-bit constant that needs the paddis instruction and an +;; addi/addis/paddi instruction combination. +(define_predicate "paddis_paddi_operand" + (match_code "const_int") +{ + if (!TARGET_PADDIS && TARGET_POWERPC64) +return 0; + + /* If addi, addis, or paddi can handle the number, don't return true. */ + HOST_WIDE_INT value = INTVAL (op); + if (SIGNED_INTEGER_34BIT_P (value)) +return false; + + /* If the number is too large for padds, return false. */ + if (!SIGNED_INTEGER_32BIT_P (value >> 32)) +return false; + + /* If the bottom 32-bits are zero, we can use paddis alone to handle it. */ + if ((value & HOST_WIDE_INT_C(0x)) == 0) +return false; + + return true; +}) + ;; Return 1 if op is a register that is not special. ;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where ;; you need to be careful in moving a SFmode to SImode and vice versa due to @@ -1050,7 +1097,10 @@ (if_then_else (match_code "const_int") (match_test "satisfies_constraint_I (op) || satisfies_constraint_L (op) -|| satisfies_constraint_eI (op)") +|| satisfies_constraint_eI (op) +|| satisfies_constraint_eU (op) +|| satisfies_constraint_eV (op)") + (match_operand 0 "gpc_reg_operand"))) ;; Return 1 if the operand is either a non-special register, or 0, or -1. diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs600
[gcc(refs/users/meissner/heads/work163-dmf)] Update ChangeLog.*
https://gcc.gnu.org/g:ae0e0f7725093cfc154ea376e6da9ac652624d45 commit ae0e0f7725093cfc154ea376e6da9ac652624d45 Author: Michael Meissner Date: Fri Mar 22 00:58:36 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.dmf | 122 ++ 1 file changed, 122 insertions(+) diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf index edc0448b14f..1d1ae3c7d2d 100644 --- a/gcc/ChangeLog.dmf +++ b/gcc/ChangeLog.dmf @@ -1,3 +1,125 @@ + Branch work163-dmf, patch #133 + +Add paddis support. + +2024-03-22 Michael Meissner + +gcc/ + + * config/rs6000/constraints.md (eU): New constraint. + (eV): Likewise. + * config/rs6000/predicates.md (paddis_operand): New predicate. + (paddis_paddi_operand): Likewise. + (add_operand): Add paddis support. + * config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS): Add -mpaddis + support. + (POWERPC_MASKS): Likewise. + * config/rs6000/rs6000.cc (num_insns_constant_gpr): Add -mpaddis + support. + (num_insns_constant_multi): Likewise. + (print_operand): Add %B for paddis support. + (rs6000_opt_masks): Add -mpaddis. + & config/rs6000/rs6000.h (SIGNED_INTEGER_32BIT_P): New macro. + * config/rs6000/rs6000.md (isa attribute): Add -mpaddis support. + (enabled attribute); Likewise. + (add3): Likewise. + (adddi3 splitter): New splitter for paddis. + (movdi_internal64): Add -mpaddis support. + (movdi splitter): New splitter for -mpaddis. + * config/rs6000/rs6000.opt (-mpaddis): New switch. + + Branch work163-dmf, patch #132 + +Add saturating subtract built-ins. + +This patch adds support for a saturating subtract built-in function that may be +added to a future PowerPC processor. Note, if it is added, the name of the +built-in function may change before GCC 13 is released. If the name changes, +we will submit a patch changing the name. + +I also added support for providing dense math built-in functions, even though +at present, we have not added any new built-in functions for dense math. It is +likely we will want to add new dense math built-in functions as the dense math +support is fleshed out. + +The patches have been tested on both little and big endian systems. Can I check +it into the master branch? + +2024-03-22 Michael Meissner + +gcc/ + + * config/rs6000/rs6000-builtin.cc (rs6000_invalid_builtin): Add support + for flagging invalid use of future built-in functions. + (rs6000_builtin_is_supported): Add support for future built-in + functions. + * config/rs6000/rs6000-builtins.def (__builtin_saturate_subtract32): New + built-in function for -mcpu=future. + (__builtin_saturate_subtract64): Likewise. + * config/rs6000/rs6000-gen-builtins.cc (enum bif_stanza): Add stanzas + for -mcpu=future built-ins. + (stanza_map): Likewise. + (enable_string): Likewise. + (struct attrinfo): Likewise. + (parse_bif_attrs): Likewise. + (write_decls): Likewise. + * config/rs6000/rs6000.md (sat_sub3): Add saturating subtract + built-in insn declarations. + (sat_sub3_dot): Likewise. + (sat_sub3_dot2): Likewise. + * doc/extend.texi (Future PowerPC built-ins): New section. + +gcc/testsuite/ + + * gcc.target/powerpc/subfus-1.c: New test. + * gcc.target/powerpc/subfus-2.c: Likewise. + + Branch work163-dmf, patch #131 + +Support load/store vector with right length. + +This patch adds support for new instructions that may be added to the PowerPC +architecture in the future to enhance the load and store vector with length +instructions. + +The current instructions (lxvl, lxvll, stxvl, and stxvll) are inconvient to use +since the count for the number of bytes must be in the top 8 bits of the GPR +register, instead of the bottom 8 bits. This meant that code generating these +instructions typically had to do a shift left by 56 bits to get the count into +the right position. In a future version of the PowerPC architecture, new +variants of these instructions might be added that expect the count to be in +the bottom 8 bits of the GPR register. These patches add this support to GCC +if the user uses the -mcpu=future option. + +I discovered that the code in rs6000-string.cc to generate ISA 3.1 lxvl/stxvl +future lxvll/stxvll instructions would generate these instructions on 32-bit. +However the patterns for these instructions is only done on 64-bit systems. So +I added a check for 64-bit support before generating the instructions. + +The patches have been tested on both little and big endian systems. Can I check +it into the master branch? + +2024-03-22 Michael Meissner + +gcc/ + + * config/rs6000/rs6000-string.cc (expand_block_move): Do not generate + lxvl and stxvl on 32-bit.
[gcc r14-9607] RISC-V: Don't add fractional LMUL types to V_VLS for XTheadVector
https://gcc.gnu.org/g:fd5e5dda8d79d62396f56d4fdd628b4bc5f9fa24 commit r14-9607-gfd5e5dda8d79d62396f56d4fdd628b4bc5f9fa24 Author: Christoph Müllner Date: Thu Mar 21 15:40:49 2024 +0100 RISC-V: Don't add fractional LMUL types to V_VLS for XTheadVector The expansion of `memset` (via expand_builtin_memset_args()) uses clear_by_pieces() and store_by_pieces() to avoid calls to the C runtime. To check if a type can be used for that purpose the function by_pieces_mode_supported_p() tests if a `mov` and a `vec_duplicate` INSN can be expaned by the backend. The `vec_duplicate` expansion takes arguments of type `V_VLS`. The `mov` expansions take arguments of type `V`, `VB`, `VT`, `VLS_AVL_IMM`, and `VLS_AVL_REG`. Some of these types (in fact not types but type iterators) include fractional LMUL types. E.g. `V_VLS` includes `V`, which includes `VI`, which includes `RVVMF2QI`. This results in an attempt to use fractional LMUL-types for the `memset` expansion resulting in an ICE for XTheadVector, because that extension cannot handle fractional LMULs. This patch addresses this issue by splitting the definition of the `VI` mode itereator into `VI_NOFRAC` (without fractional LMUL types) and `VI_FRAC` (only fractional LMUL types). Further, it defines `V_VLS` such, that `VI_FRAC` types are only included if XTheadVector is not enabled. The effect is demonstrated by a new test case that shows that the by-pieces framework now emits `sb` instructions instead of triggering an ICE. Signed-off-by: Christoph Müllner PR target/114194 gcc/ChangeLog: * config/riscv/vector-iterators.md: Split VI into VI_FRAC and VI_NOFRAC. Only include VI_NOFRAC in V_VLS without TARGET_XTHEADVECTOR. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/xtheadvector/pr114194.c: New test. Signed-off-by: Christoph Müllner Diff: --- gcc/config/riscv/vector-iterators.md | 19 +--- .../gcc.target/riscv/rvv/xtheadvector/pr114194.c | 56 ++ 2 files changed, 69 insertions(+), 6 deletions(-) diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index c2ea7e8b10a..a24e1bf078f 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -108,17 +108,24 @@ UNSPECV_FRM_RESTORE_EXIT ]) -(define_mode_iterator VI [ - RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") - - RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") - - RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") +;; Subset of VI with fractional LMUL types +(define_mode_iterator VI_FRAC [ + RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + (RVVMF2SI "TARGET_MIN_VLEN > 32") +]) +;; Subset of VI with non-fractional LMUL types +(define_mode_iterator VI_NOFRAC [ + RVVM8QI RVVM4QI RVVM2QI RVVM1QI + RVVM8HI RVVM4HI RVVM2HI RVVM1HI + RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64") (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64") ]) +(define_mode_iterator VI [ VI_NOFRAC (VI_FRAC "!TARGET_XTHEADVECTOR") ]) + ;; This iterator is the same as above but with TARGET_VECTOR_ELEN_FP_16 ;; changed to TARGET_ZVFH. TARGET_VECTOR_ELEN_FP_16 is also true for ;; TARGET_ZVFHMIN while we actually want to disable all instructions apart diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c new file mode 100644 index 000..fc2d1349425 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_xtheadvector" { target { rv32 } } } */ +/* { dg-options "-march=rv64gc_xtheadvector" { target { rv64 } } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** foo0_1: +** sb\tzero,0([a-x0-9]+) +** ret +*/ +void foo0_1 (void *p) +{ + __builtin_memset (p, 0, 1); +} + +/* +** foo0_7: +** sb\tzero,0([a-x0-9]+) +** sb\tzero,1([a-x0-9]+) +** sb\tzero,2([a-x0-9]+) +** sb\tzero,3([a-x0-9]+) +** sb\tzero,4([a-x0-9]+) +** sb\tzero,5([a-x0-9]+) +** sb\tzero,6([a-x0-9]+) +** ret +*/ +void foo0_7 (void *p) +{ + __builtin_memset (p, 0, 7); +} + +/* +** foo1_1: +** li\t[a-x0-9]+,1 +** sb\t[a-x0-9]+,0([a-x0-9]+) +** ret +*/ +void foo1_1 (void *p) +{ + __builtin_memset (p, 1, 1); +} + +/* +** foo1_5: +** li\t[a-x0-9]+,1 +** sb\t[a-x0-9]+,0([a-x0-9]+) +** sb\t[a-x0-9]+,1([a-x0-9]+) +** sb\t[a-x0-9]+,2([a-x0-9]+) +** sb\t[a-x0-9]+,3([a-x0-9]+) +** sb\t[a-x0-9]+,4([a-x0-9]+) +** ret +*/ +void foo1_5 (void *p) +{ + __builtin_memset (p, 1, 5); +}