[gcc r16-168] RISC-V: Extract vector stepped for expand_const_vector [NFC]
https://gcc.gnu.org/g:ab22b8c630769330b4f37eb64d2bc285344a647a commit r16-168-gab22b8c630769330b4f37eb64d2bc285344a647a Author: Pan Li Date: Thu Apr 17 10:27:17 2025 +0800 RISC-V: Extract vector stepped for expand_const_vector [NFC] Consider the expand_const_vector is quit long (about 500 lines) and complicated, we would like to extract the different case into different functions. For example, the const vector stepped will be extracted into expand_const_vector_stepped. The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_const_vector): Extract const vector stepped into separated func. (expand_const_vector_single_step_npatterns): Add new func to take care of single step. (expand_const_vector_interleaved_stepped_npatterns): Add new func to take care of interleaved step. (expand_const_vector_stepped): Add new func to take care of const vector stepped. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-v.cc | 590 ++-- 1 file changed, 299 insertions(+), 291 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 96f0b94e6140..66c8b2921e26 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1340,334 +1340,342 @@ expand_const_vector_duplicate (rtx target, rvv_builder *builder) } static void -expand_const_vector (rtx target, rtx src) +expand_const_vector_single_step_npatterns (rtx target, rvv_builder *builder) { machine_mode mode = GET_MODE (target); rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); - rtx elt; - if (const_vec_duplicate_p (src, &elt)) -return expand_const_vec_duplicate (target, src, elt); + /* Describe the case by choosing NPATTERNS = 4 as an example. */ + insn_code icode; - /* Support scalable const series vector. */ - rtx base, step; - if (const_vec_series_p (src, &base, &step)) -return expand_const_vec_series (target, base, step); + /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */ + rtx vid = gen_reg_rtx (builder->mode ()); + rtx vid_ops[] = {vid}; + icode = code_for_pred_series (builder->mode ()); + emit_vlmax_insn (icode, NULLARY_OP, vid_ops); - /* Handle variable-length vector. */ - unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src); - unsigned int npatterns = CONST_VECTOR_NPATTERNS (src); - rvv_builder builder (mode, npatterns, nelts_per_pattern); - for (unsigned int i = 0; i < nelts_per_pattern; i++) + if (builder->npatterns_all_equal_p ()) { - for (unsigned int j = 0; j < npatterns; j++) - builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j)); -} - builder.finalize (); + /* Generate the variable-length vector following this rule: +{ a, a, a + step, a + step, a + step * 2, a + step * 2, ...} + E.g. { 0, 0, 8, 8, 16, 16, ... } */ + + /* We want to create a pattern where value[idx] = floor (idx / +NPATTERNS). As NPATTERNS is always a power of two we can +rewrite this as = idx & -NPATTERNS. */ + /* Step 2: VID AND -NPATTERNS: +{ 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... } */ + rtx imm = gen_int_mode (-builder->npatterns (), builder->inner_mode ()); + rtx tmp1 = gen_reg_rtx (builder->mode ()); + rtx and_ops[] = {tmp1, vid, imm}; + icode = code_for_pred_scalar (AND, builder->mode ()); + emit_vlmax_insn (icode, BINARY_OP, and_ops); + + /* Step 3: Convert to step size 1. */ + rtx tmp2 = gen_reg_rtx (builder->mode ()); + /* log2 (npatterns) to get the shift amount to convert +Eg. { 0, 0, 0, 0, 4, 4, ... } +into { 0, 0, 0, 0, 1, 1, ... }. */ + HOST_WIDE_INT shift_amt = exact_log2 (builder->npatterns ()); + rtx shift = gen_int_mode (shift_amt, builder->inner_mode ()); + rtx shift_ops[] = {tmp2, tmp1, shift}; + icode = code_for_pred_scalar (ASHIFTRT, builder->mode ()); + emit_vlmax_insn (icode, BINARY_OP, shift_ops); + + /* Step 4: Multiply to step size n. */ + HOST_WIDE_INT step_size = + INTVAL (builder->elt (builder->npatterns ())) + - INTVAL (builder->elt (0)); + rtx tmp3 = gen_reg_rtx (builder->mode ()); + if (pow2p_hwi (step_size)) + { + /* Power of 2 can be handled with a left shift. */ + HOST_WIDE_INT shift = exact_log2 (step_size); + rtx shift_amount = gen_int_mode (shift, Pmode); + insn_code icode = code_for_pred_scalar (ASHIFT, mode); + rtx ops[] = {tmp3, tmp2, shift_amount}; + emit_vlmax_insn (icode, BINARY_OP, ops); + } + else + { + rtx mult_amt = gen_int_mode (step_size, builder->inner_mode ()); + insn_code icode = code_for_pred_scal
[gcc r16-167] RISC-V: Extract vector duplicate for expand_const_vector [NFC]
https://gcc.gnu.org/g:cf0283a8ed035e382a3870a8dce554acf7dfc82e commit r16-167-gcf0283a8ed035e382a3870a8dce554acf7dfc82e Author: Pan Li Date: Wed Apr 16 15:47:21 2025 +0800 RISC-V: Extract vector duplicate for expand_const_vector [NFC] Consider the expand_const_vector is quit long (about 500 lines) and complicated, we would like to extract the different case into different functions. For example, the const vector duplicate will be extracted into expand_const_vector_duplicate, and then expand_const_vector_duplicate_repeating and expand_const_vector_duplicate_default for the underlying function. The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_const_vector_duplicate_repeating): Add new func to take care of vector duplicate with repeating. (expand_const_vector_duplicate_default): Add new func to take care of default const vector duplicate. (expand_const_vector_duplicate): Add new func to take care of all const vector duplicate. (expand_const_vector): Extract const vector duplicate into separated function. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-v.cc | 180 +--- 1 file changed, 104 insertions(+), 76 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index e0af296449c3..96f0b94e6140 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1236,6 +1236,109 @@ expand_const_vec_series (rtx target, rtx base, rtx step) emit_move_insn (target, result); } + +/* We handle the case that we can find a vector container to hold + element bitsize = NPATTERNS * ele_bitsize. + + NPATTERNS = 8, element width = 8 + v = { 0, 1, 2, 3, 4, 5, 6, 7, ... } + In this case, we can combine NPATTERNS element into a larger + element. Use element width = 64 and broadcast a vector with + all element equal to 0x0706050403020100. */ + +static void +expand_const_vector_duplicate_repeating (rtx target, rvv_builder *builder) +{ + machine_mode mode = GET_MODE (target); + rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); + rtx ele = builder->get_merged_repeating_sequence (); + rtx dup; + + if (lra_in_progress) +{ + dup = gen_reg_rtx (builder->new_mode ()); + rtx ops[] = {dup, ele}; + emit_vlmax_insn (code_for_pred_broadcast (builder->new_mode ()), + UNARY_OP, ops); +} + else +dup = expand_vector_broadcast (builder->new_mode (), ele); + + emit_move_insn (result, gen_lowpart (mode, dup)); + + if (result != target) +emit_move_insn (target, result); +} + +/* We handle the case that we can't find a vector container to hold + element bitsize = NPATTERNS * ele_bitsize. + + NPATTERNS = 8, element width = 16 + v = { 0, 1, 2, 3, 4, 5, 6, 7, ... } + Since NPATTERNS * element width = 128, we can't find a container + to hold it. + + In this case, we use NPATTERNS merge operations to generate such + vector. */ + +static void +expand_const_vector_duplicate_default (rtx target, rvv_builder *builder) +{ + machine_mode mode = GET_MODE (target); + rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); + unsigned int nbits = builder->npatterns () - 1; + + /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */ + rtx vid = gen_reg_rtx (builder->int_mode ()); + rtx op[] = {vid}; + emit_vlmax_insn (code_for_pred_series (builder->int_mode ()), NULLARY_OP, op); + + /* Generate vid_repeat = { 0, 1, ... nbits, ... } */ + rtx vid_repeat = gen_reg_rtx (builder->int_mode ()); + rtx and_ops[] = {vid_repeat, vid, + gen_int_mode (nbits, builder->inner_int_mode ())}; + emit_vlmax_insn (code_for_pred_scalar (AND, builder->int_mode ()), BINARY_OP, + and_ops); + + rtx tmp1 = gen_reg_rtx (builder->mode ()); + rtx dup_ops[] = {tmp1, builder->elt (0)}; + emit_vlmax_insn (code_for_pred_broadcast (builder->mode ()), UNARY_OP, + dup_ops); + + for (unsigned int i = 1; i < builder->npatterns (); i++) +{ + /* Generate mask according to i. */ + rtx mask = gen_reg_rtx (builder->mask_mode ()); + rtx const_vec = gen_const_vector_dup (builder->int_mode (), i); + expand_vec_cmp (mask, EQ, vid_repeat, const_vec); + + /* Merge scalar to each i. */ + rtx tmp2 = gen_reg_rtx (builder->mode ()); + rtx merge_ops[] = {tmp2, tmp1, builder->elt (i), mask}; + insn_code icode = code_for_pred_merge_scalar (builder->mode ()); + emit_vlmax_insn (icode, MERGE_OP, merge_ops); + tmp1 = tmp2; +} + + emit_move_insn (result, tmp1); + + if (result != target) +emit_move_insn (target, result); +} + +/* Handle the case with repeating sequence that NELTS_PER_PATTE
[gcc r16-171] i386: Adjust apx-ndd.c for frontend promotion removal
https://gcc.gnu.org/g:cdb239bd213524a43b38ad8fca8e7ed0b5fb41eb commit r16-171-gcdb239bd213524a43b38ad8fca8e7ed0b5fb41eb Author: H.J. Lu Date: Sun Nov 10 11:27:14 2024 +0800 i386: Adjust apx-ndd.c for frontend promotion removal Since the C frontend no longer promotes integer argument smaller than int, the apx-ndd.c codgen is slightly different: apx-ndd.s (original)2024-11-10 06:07:09.894876973 +0800 apx-ndd.s (updated) 2024-11-10 06:06:59.371860565 +0800 @@ -17,7 +17,7 @@ foo_add_char: foo1_add_char: .LFB1: .cfi_startproc - leal(%rsi,%rdi), %eax + leal(%rdi,%rsi), %eax ret .cfi_endproc .LFE1: @@ -50,7 +50,7 @@ foo_add_short: foo1_add_short: .LFB4: .cfi_startproc - leal(%rsi,%rdi), %eax + leal(%rdi,%rsi), %eax ret .cfi_endproc .LFE4: @@ -413,7 +413,7 @@ foo_and_char: foo1_and_char: .LFB37: .cfi_startproc - andl%edi, %esi, %eax + andl%esi, %edi, %eax ret .cfi_endproc .LFE37: @@ -435,7 +435,7 @@ foo_and_short: foo1_and_short: .LFB39: .cfi_startproc - andl%edi, %esi, %eax + andl%esi, %edi, %eax ret .cfi_endproc .LFE39: @@ -501,7 +501,7 @@ foo_or_char: foo1_or_char: .LFB45: .cfi_startproc - orl %edi, %esi, %eax + orl %esi, %edi, %eax ret .cfi_endproc .LFE45: @@ -523,7 +523,7 @@ foo_or_short: foo1_or_short: .LFB47: .cfi_startproc - orl %edi, %esi, %eax + orl %esi, %edi, %eax ret .cfi_endproc .LFE47: @@ -589,7 +589,7 @@ foo_xor_char: foo1_xor_char: .LFB53: .cfi_startproc - xorl%edi, %esi, %eax + xorl%esi, %edi, %eax ret .cfi_endproc .LFE53: @@ -611,7 +611,7 @@ foo_xor_short: foo1_xor_short: .LFB55: .cfi_startproc - xorl%edi, %esi, %eax + xorl%esi, %edi, %eax ret .cfi_endproc .LFE55: @@ -1018,7 +1018,7 @@ foo4_rol_uint64_t: foo1_imul_short: .LFB92: .cfi_startproc - imull %edi, %esi, %eax + imull %esi, %edi, %eax ret .cfi_endproc .LFE92: Adjust the assembler scans. PR middle-end/112877 * gcc.target/i386/apx-ndd.c: Adjusted. Signed-off-by: H.J. Lu Diff: --- gcc/testsuite/gcc.target/i386/apx-ndd.c | 9 +++-- 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index ce77630a47c8..2b2f4fc4b0fe 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -188,16 +188,13 @@ FOO2 (int64_t, imul, *) /* { dg-final { scan-assembler-times "not(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "andb\[^\n\r]*1, \\(%(?:r|e)di\\), %al" 1 } } */ /* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)ax" 3 } } */ -/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */ -/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */ +/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "orb\[^\n\r]*1, \\(%(?:r|e)di\\), %al" 2} } */ /* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)ax" 6 } } */ -/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 4 } } */ -/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */ +/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 8 } } */ /* { dg-final { scan-assembler-times "xorb\[^\n\r]*1, \\(%(?:r|e)di\\), %al" 1 } } */ /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)ax" 3 } } */ -/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */ -/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */ +/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "sal(?:b|l|w|q)\[^\n\r]*1, \\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]*7, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "sar(?:b|l|w|q)\[^\n
[gcc r16-170] Drop targetm.promote_prototypes from C, C++ and Ada frontends
https://gcc.gnu.org/g:a670ebde3995481225ec62b29686ec07a21e5c10 commit r16-170-ga670ebde3995481225ec62b29686ec07a21e5c10 Author: H.J. Lu Date: Thu Nov 21 07:54:35 2024 +0800 Drop targetm.promote_prototypes from C, C++ and Ada frontends Remove the targetm.calls.promote_prototypes call from C, C++ and Ada frontends. gcc/ PR c/48274 PR middle-end/112877 PR middle-end/118288 * gimple.cc (gimple_builtin_call_types_compatible_p): Remove the targetm.calls.promote_prototypes call. * tree.cc (tree_builtin_call_types_compatible_p): Likewise. gcc/ada/ PR middle-end/112877 * gcc-interface/utils.cc (create_param_decl): Remove the targetm.calls.promote_prototypes call. gcc/c/ PR c/48274 PR middle-end/112877 PR middle-end/118288 * c-decl.cc (start_decl): Remove the targetm.calls.promote_prototypes call. (store_parm_decls_oldstyle): Likewise. (finish_function): Likewise. * c-typeck.cc (convert_argument): Likewise. (c_safe_arg_type_equiv_p): Likewise. gcc/cp/ PR middle-end/112877 * call.cc (type_passed_as): Remove the targetm.calls.promote_prototypes call. (convert_for_arg_passing): Likewise. * typeck.cc (cxx_safe_arg_type_equiv_p): Likewise. Signed-off-by: H.J. Lu Diff: --- gcc/ada/gcc-interface/utils.cc | 24 gcc/c/c-decl.cc| 40 gcc/c/c-typeck.cc | 19 --- gcc/cp/call.cc | 10 -- gcc/cp/typeck.cc | 13 - gcc/gimple.cc | 10 +- gcc/tree.cc| 14 -- 7 files changed, 9 insertions(+), 121 deletions(-) diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc index 9212827aecfe..23737c3296cc 100644 --- a/gcc/ada/gcc-interface/utils.cc +++ b/gcc/ada/gcc-interface/utils.cc @@ -3286,30 +3286,6 @@ tree create_param_decl (tree name, tree type) { tree param_decl = build_decl (input_location, PARM_DECL, name, type); - - /* Honor TARGET_PROMOTE_PROTOTYPES like the C compiler, as not doing so - can lead to various ABI violations. */ - if (targetm.calls.promote_prototypes (NULL_TREE) - && INTEGRAL_TYPE_P (type) - && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node)) -{ - /* We have to be careful about biased types here. Make a subtype -of integer_type_node with the proper biasing. */ - if (TREE_CODE (type) == INTEGER_TYPE - && TYPE_BIASED_REPRESENTATION_P (type)) - { - tree subtype - = make_unsigned_type (TYPE_PRECISION (integer_type_node)); - TREE_TYPE (subtype) = integer_type_node; - TYPE_BIASED_REPRESENTATION_P (subtype) = 1; - SET_TYPE_RM_MIN_VALUE (subtype, TYPE_MIN_VALUE (type)); - SET_TYPE_RM_MAX_VALUE (subtype, TYPE_MAX_VALUE (type)); - type = subtype; - } - else - type = integer_type_node; -} - DECL_ARG_TYPE (param_decl) = type; return param_decl; } diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc index 8c420f229762..e7aee8a0f4b2 100644 --- a/gcc/c/c-decl.cc +++ b/gcc/c/c-decl.cc @@ -5720,26 +5720,6 @@ start_decl (struct c_declarator *declarator, struct c_declspecs *declspecs, DECL_EXTERNAL (decl) = !DECL_EXTERNAL (decl); } - if (TREE_CODE (decl) == FUNCTION_DECL - && targetm.calls.promote_prototypes (TREE_TYPE (decl))) -{ - struct c_declarator *ce = declarator; - - if (ce->kind == cdk_pointer) - ce = declarator->declarator; - if (ce->kind == cdk_function) - { - tree args = ce->u.arg_info->parms; - for (; args; args = DECL_CHAIN (args)) - { - tree type = TREE_TYPE (args); - if (type && INTEGRAL_TYPE_P (type) - && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node)) - DECL_ARG_TYPE (args) = c_type_promotes_to (type); - } - } -} - if (TREE_CODE (decl) == FUNCTION_DECL && DECL_DECLARED_INLINE_P (decl) && DECL_UNINLINABLE (decl) @@ -11179,13 +11159,6 @@ store_parm_decls_oldstyle (tree fndecl, const struct c_arg_info *arg_info) useful for argument types like uid_t. */ DECL_ARG_TYPE (parm) = TREE_TYPE (parm); - if (targetm.calls.promote_prototypes (TREE_TYPE (current_function_decl)) - && INTEGRAL_TYPE_P (TREE_TYPE (parm)) - && (TYPE_PRECISION (TREE_TYPE (parm)) - < TYPE_PRECISION (integer_type_node))) - DECL_ARG_TYPE (parm) - = c_type_promotes_to (TREE_TYPE (
[gcc r16-172] vect-simd-clone-1[6-8][cd].c: Expect in-branch clones for x86
https://gcc.gnu.org/g:f9f81d5017adc5d860b24f67aeb89b4e79c7ebdb commit r16-172-gf9f81d5017adc5d860b24f67aeb89b4e79c7ebdb Author: H.J. Lu Date: Sun Nov 10 16:41:10 2024 +0800 vect-simd-clone-1[6-8][cd].c: Expect in-branch clones for x86 Since the C frontend no longer promotes char and short arguments, expect in-branch clones for x86. PR middle-end/112877 * gcc.dg/vect/vect-simd-clone-16c.c: Expect in-branch clones for x86. * gcc.dg/vect/vect-simd-clone-16d.c: Likewise. * gcc.dg/vect/vect-simd-clone-17c.c: Likewise. * gcc.dg/vect/vect-simd-clone-17d.c: Likewise. * gcc.dg/vect/vect-simd-clone-18c.c: Likewise. * gcc.dg/vect/vect-simd-clone-18d.c: Likewise. Signed-off-by: H.J. Lu Diff: --- gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c | 5 + gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c | 4 +--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c | 5 + gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c | 5 + gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c | 5 + gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c | 5 + 6 files changed, 6 insertions(+), 23 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c index 4fdf25d06c69..628d45756739 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c @@ -7,11 +7,8 @@ /* Ensure the the in-branch simd clones are used on targets that support them. Some targets use another call for the epilogue loops. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ -/* x86_64 fails to use in-branch clones for TYPE=short. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */ - /* The LTO test produces two dump files and we scan the wrong one. */ /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c index 55d3c0afae5e..d1f85b0703e9 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c @@ -7,11 +7,9 @@ /* Ensure the the in-branch simd clones are used on targets that support them. Some targets use another call for the epilogue loops. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ -/* x86_64 fails to use in-branch clones for TYPE=char. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */ /* The LTO test produces two dump files and we scan the wrong one. */ /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c index 6afa2fd595e9..6148abee8067 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c @@ -7,11 +7,8 @@ /* Ensure the the in-branch simd clones are used on targets that support them. Some targets use another call for the epilogue loops. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { ! { x86_64-*-* || { i?86-*-* || aarch64*-*-* } } } } } } */ +/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect" { target { !aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect" { target { aarch64*-*-* } } } } */ -/* x86_64 fails to use in-branch clones for TYPE=short. */ -/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect" { target x86_64-*-* i?86-*-* } } } */ - /* The LTO test produces two dump files and we scan the wrong one. */ /* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c index 56177880b6bf..63687984598f 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c @@ -7,11 +7,8 @@ /* Ensure the the in-branch simd clones are used on targets that support them. Some targets use another call for the epilogue loops. *
[gcc r16-165] RISC-V: Extract vec_duplicate for expand_const_vector [NFC]
https://gcc.gnu.org/g:e6e42a709f3cd87e7a5efca72267cab57e0385cb commit r16-165-ge6e42a709f3cd87e7a5efca72267cab57e0385cb Author: Pan Li Date: Wed Apr 16 11:16:21 2025 +0800 RISC-V: Extract vec_duplicate for expand_const_vector [NFC] Consider the expand_const_vector is quit long (about 500 lines) and complicated, we would like to extract the different case into different functions. For example, the const vec_duplicate will be extracted into expand_const_vec_duplicate. The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_const_vector): Extract const vec_duplicate into separated function. (expand_const_vec_duplicate): Add new func to take care of the const vec_duplicate. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-v.cc | 92 - 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index aae2d274336e..1eb14a24e3db 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1171,61 +1171,69 @@ expand_vector_init_trailing_same_elem (rtx target, } static void -expand_const_vector (rtx target, rtx src) +expand_const_vec_duplicate (rtx target, rtx src, rtx elt) { machine_mode mode = GET_MODE (target); rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); - rtx elt; - if (const_vec_duplicate_p (src, &elt)) + + if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) +{ + gcc_assert (rtx_equal_p (elt, const0_rtx) + || rtx_equal_p (elt, const1_rtx)); + + rtx ops[] = {result, src}; + emit_vlmax_insn (code_for_pred_mov (mode), UNARY_MASK_OP, ops); +} + else if (valid_vec_immediate_p (src)) { - if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) - { - gcc_assert (rtx_equal_p (elt, const0_rtx) - || rtx_equal_p (elt, const1_rtx)); - rtx ops[] = {result, src}; - emit_vlmax_insn (code_for_pred_mov (mode), UNARY_MASK_OP, ops); - } /* Element in range -16 ~ 15 integer or 0.0 floating-point, we use vmv.v.i instruction. */ - else if (valid_vec_immediate_p (src)) + rtx ops[] = {result, src}; + emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops); +} + else +{ + /* Emit vec_duplicate split pattern before RA so that +we could have a better optimization opportunity in LICM +which will hoist vmv.v.x outside the loop and in fwprop && combine +which will transform 'vv' into 'vx' instruction. + +The reason we don't emit vec_duplicate split pattern during +RA since the split stage after RA is a too late stage to generate +RVV instruction which need an additional register (We can't +allocate a new register after RA) for VL operand of vsetvl +instruction (vsetvl a5, zero). */ + if (lra_in_progress) { - rtx ops[] = {result, src}; - emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops); + rtx ops[] = {result, elt}; + emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops); } else { - /* Emit vec_duplicate split pattern before RA so that -we could have a better optimization opportunity in LICM -which will hoist vmv.v.x outside the loop and in fwprop && combine -which will transform 'vv' into 'vx' instruction. - -The reason we don't emit vec_duplicate split pattern during -RA since the split stage after RA is a too late stage to generate -RVV instruction which need an additional register (We can't -allocate a new register after RA) for VL operand of vsetvl -instruction (vsetvl a5, zero). */ - if (lra_in_progress) - { - rtx ops[] = {result, elt}; - emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops); - } - else - { - struct expand_operand ops[2]; - enum insn_code icode = optab_handler (vec_duplicate_optab, mode); - gcc_assert (icode != CODE_FOR_nothing); - create_output_operand (&ops[0], result, mode); - create_input_operand (&ops[1], elt, GET_MODE_INNER (mode)); - expand_insn (icode, 2, ops); - result = ops[0].value; - } + struct expand_operand ops[2]; + enum insn_code icode = optab_handler (vec_duplicate_optab, mode); + gcc_assert (icode != CODE_FOR_nothing); + create_output_operand (&ops[0], result, mode); + create_input_operand (&ops[1], elt, GET_MODE_INNER (mode)); + expand_insn (icode, 2, ops); + result = ops[0].value; } - - if (result != target
[gcc r16-166] RISC-V: Extract vec_series for expand_const_vector [NFC]
https://gcc.gnu.org/g:cf366b62f48fc5c06b76a9a78320888a9591031b commit r16-166-gcf366b62f48fc5c06b76a9a78320888a9591031b Author: Pan Li Date: Wed Apr 16 14:43:23 2025 +0800 RISC-V: Extract vec_series for expand_const_vector [NFC] Consider the expand_const_vector is quit long (about 500 lines) and complicated, we would like to extract the different case into different functions. For example, the const vec_series will be extracted into expand_const_vec_series. The below test suites are passed for this patch. * The rv64gcv fully regression test. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_const_vec_series): Add new func to take care of the const vec_series. (expand_const_vector): Extract const vec_series into separated function. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-v.cc | 20 +--- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 1eb14a24e3db..e0af296449c3 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1224,6 +1224,18 @@ expand_const_vec_duplicate (rtx target, rtx src, rtx elt) emit_move_insn (target, result); } +static void +expand_const_vec_series (rtx target, rtx base, rtx step) +{ + machine_mode mode = GET_MODE (target); + rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); + + expand_vec_series (result, base, step); + + if (result != target) +emit_move_insn (target, result); +} + static void expand_const_vector (rtx target, rtx src) { @@ -1237,13 +1249,7 @@ expand_const_vector (rtx target, rtx src) /* Support scalable const series vector. */ rtx base, step; if (const_vec_series_p (src, &base, &step)) -{ - expand_vec_series (result, base, step); - - if (result != target) - emit_move_insn (target, result); - return; -} +return expand_const_vec_series (target, base, step); /* Handle variable-length vector. */ unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
[gcc r16-161] Add m32c*-*-* to the list of obsolete targets
https://gcc.gnu.org/g:0035613389a939043d654aea7a76faae95f69422 commit r16-161-g0035613389a939043d654aea7a76faae95f69422 Author: Iain Buclaw Date: Fri Apr 25 19:45:07 2025 +0200 Add m32c*-*-* to the list of obsolete targets This patch marks m32c*-*-* targets obsolete in GCC 16. The target has not had a maintainer since GCC 9, and fails to compile even the simplest of functions since GCC 8 (reported in PR83670). contrib/ChangeLog: * config-list.mk: Add m32c*-*-* to the list of obsoleted targets. gcc/ChangeLog: * config.gcc (LIST): --enable-obsolete for m32c-elf. Diff: --- contrib/config-list.mk | 2 +- gcc/config.gcc | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/contrib/config-list.mk b/contrib/config-list.mk index fc9fc9902bff..58bb4c5c186b 100644 --- a/contrib/config-list.mk +++ b/contrib/config-list.mk @@ -65,7 +65,7 @@ LIST = \ ia64-hp-vmsOPT-enable-obsolete iq2000-elf lm32-elf \ lm32-rtems lm32-uclinux \ loongarch64-linux-gnuf64 loongarch64-linux-gnuf32 loongarch64-linux-gnusf \ - m32c-elf m32r-elf m32rle-elf \ + m32c-elfOPT-enable-obsolete m32r-elf m32rle-elf \ m68k-elf m68k-netbsdelf \ m68k-uclinux m68k-linux m68k-rtems \ mcore-elf microblaze-linux microblaze-elf \ diff --git a/gcc/config.gcc b/gcc/config.gcc index d98df883fce5..6dbe880c9d45 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -273,6 +273,7 @@ esac # Obsolete configurations. case ${target} in ia64*-*-hpux* | ia64*-*-*vms* | ia64*-*-elf* \ + | m32c*-*-* \ ) if test "x$enable_obsolete" != xyes; then echo "*** Configuration ${target} is obsolete." >&2
[gcc r16-169] Honor TARGET_PROMOTE_PROTOTYPES during RTL expand
https://gcc.gnu.org/g:78db4753c9646a372512e6a951fced12f74de0bc commit r16-169-g78db4753c9646a372512e6a951fced12f74de0bc Author: H.J. Lu Date: Thu Nov 21 08:11:06 2024 +0800 Honor TARGET_PROMOTE_PROTOTYPES during RTL expand Promote integer arguments smaller than int if TARGET_PROMOTE_PROTOTYPES returns true. gcc/ PR middle-end/112877 * calls.cc (initialize_argument_information): Promote small integer arguments if TARGET_PROMOTE_PROTOTYPES returns true. gcc/testsuite/ PR middle-end/112877 * gfortran.dg/pr112877-1.f90: New test. Signed-off-by: H.J. Lu Diff: --- gcc/calls.cc | 9 + gcc/testsuite/gfortran.dg/pr112877-1.f90 | 17 + 2 files changed, 26 insertions(+) diff --git a/gcc/calls.cc b/gcc/calls.cc index 076e046a8ef1..676f0f9229e0 100644 --- a/gcc/calls.cc +++ b/gcc/calls.cc @@ -1382,6 +1382,11 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED, } } + bool promote_p += targetm.calls.promote_prototypes (fndecl + ? TREE_TYPE (fndecl) + : fntype); + /* I counts args in order (to be) pushed; ARGPOS counts in order written. */ for (argpos = 0; argpos < num_actuals; i--, argpos++) { @@ -1391,6 +1396,10 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED, /* Replace erroneous argument with constant zero. */ if (type == error_mark_node || !COMPLETE_TYPE_P (type)) args[i].tree_value = integer_zero_node, type = integer_type_node; + else if (promote_p + && INTEGRAL_TYPE_P (type) + && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node)) + type = integer_type_node; /* If TYPE is a transparent union or record, pass things the way we would pass the first field of the union or record. We have diff --git a/gcc/testsuite/gfortran.dg/pr112877-1.f90 b/gcc/testsuite/gfortran.dg/pr112877-1.f90 new file mode 100644 index ..f5596f0d0adc --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr112877-1.f90 @@ -0,0 +1,17 @@ +! { dg-do compile } +! { dg-options "-Os" } + +program test +use iso_c_binding, only: c_short +interface + subroutine foo(a) bind(c) +import c_short +integer(kind=c_short), intent(in), value :: a + end subroutine foo +end interface +integer(kind=c_short) a(5); +call foo (a(3)) +end + +! { dg-final { scan-assembler "movswl\t10\\(%rsp\\), %edi" { target { { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* x86_64-*-gnu* } && { ! ia32 } } } } } +! { dg-final { scan-assembler "movswl\t-14\\(%ebp\\), %eax" { target { { i?86-*-linux* i?86-*-gnu* x86_64-*-linux* x86_64-*-gnu* } && { ia32 } } } } }
[gcc r16-173] scev-cast.c: Enable for all targets and adjust scan matches
https://gcc.gnu.org/g:de8648def762e3b54200dd3cd5c6fb480b228579 commit r16-173-gde8648def762e3b54200dd3cd5c6fb480b228579 Author: H.J. Lu Date: Sun Nov 10 16:50:46 2024 +0800 scev-cast.c: Enable for all targets and adjust scan matches Since the C frontend no longer promotes char argument, enable scev-cast.c for all targets and adjust scan matches. PR middle-end/112877 * gcc.dg/tree-ssa/scev-cast.c: Enable for all targets and adjust scan match. Signed-off-by: H.J. Lu Diff: --- gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c index c569523ffa71..469e49363296 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c @@ -1,6 +1,5 @@ /* A test for various conversions of chrecs. */ -/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ void blas (signed char xxx); @@ -22,6 +21,6 @@ void tst(void) blau ((unsigned char) i); } -/* { dg-final { scan-tree-dump-times "& 255" 1 "optimized" } } */ -/* { dg-final { scan-tree-dump-times "= \\(signed char\\)" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "= \\(unsigned char\\)" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "= \\(signed char\\)" 3 "optimized" } } */
[gcc r16-174] ssa-fre-4.c: Enable for all targets and adjust scan match
https://gcc.gnu.org/g:f962f594e9006651379dafc9ef039be9654e6291 commit r16-174-gf962f594e9006651379dafc9ef039be9654e6291 Author: H.J. Lu Date: Sun Nov 10 17:55:20 2024 +0800 ssa-fre-4.c: Enable for all targets and adjust scan match Since the C frontend no longer promotes char argument, enable ssa-fre-4.c for all targets and adjust scan match. PR middle-end/112877 * gcc.dg/tree-ssa/ssa-fre-4.c: Enable for all targets and adjust scan match. Signed-off-by: H.J. Lu Diff: --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c index 5a7588febaa3..246fea3a4b95 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c @@ -1,6 +1,4 @@ -/* If the target returns false for TARGET_PROMOTE_PROTOTYPES, then there - will be no casts for FRE to eliminate and the test will fail. */ -/* { dg-do compile { target i?86-*-* x86_64-*-* hppa*-*-* m68k*-*-* } } */ +/* { dg-do compile } */ /* { dg-options "-O -fno-tree-ccp -fno-tree-forwprop -fdump-tree-fre1-details" } */ /* From PR21608. */ @@ -11,4 +9,4 @@ char bar(char f) return wrap(f); } -/* { dg-final { scan-tree-dump "Replaced \\\(char\\\) .*with " "fre1" } } */ +/* { dg-final { scan-tree-dump-not " = \\\(\[^)\]*\\\)" "fre1" } } */
[gcc r16-162] Fix i386 vectorizer cost of FP scalar MAX_EXPR and MIN_EXPR
https://gcc.gnu.org/g:1d635e79b3c2d26f864964b79717132bffbcad20 commit r16-162-g1d635e79b3c2d26f864964b79717132bffbcad20 Author: Jan Hubicka Date: Sat Apr 26 22:10:19 2025 +0200 Fix i386 vectorizer cost of FP scalar MAX_EXPR and MIN_EXPR I introduced a bug by last minute cleanups unifying the scalar and vector SSE conditional. This patch fixes it and restores cost of 1 of SSE scalar MIN/MAX Bootstrapped/regtested x86_64-linux, comitted. gcc/ChangeLog: PR target/105275 * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Fix cost of FP scalar MAX_EXPR and MIN_EXPR Diff: --- gcc/config/i386/i386.cc | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 78df3d9525ae..3171d6e0ad45 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -25420,7 +25420,8 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, case MAX_EXPR: if (fp) { - if (X87_FLOAT_MODE_P (mode)) + if (X87_FLOAT_MODE_P (mode) + && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) /* x87 requires conditional branch. We don't have cost for that. */ ; @@ -25457,7 +25458,8 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, case ABSU_EXPR: if (fp) { - if (X87_FLOAT_MODE_P (mode)) + if (X87_FLOAT_MODE_P (mode) + && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) /* fabs. */ stmt_cost = ix86_cost->fabs; else
[gcc r16-164] Refactor msse4 and mno-sse4.
https://gcc.gnu.org/g:d85444a3b00c9a6fce56459af5ec081439a9aaa0 commit r16-164-gd85444a3b00c9a6fce56459af5ec081439a9aaa0 Author: liuhongt Date: Tue Apr 1 00:30:07 2025 -0700 Refactor msse4 and mno-sse4. gcc/ChangeLog: PR target/119549 * common/config/i386/i386-common.cc (ix86_handle_option): Refactor msse4 and mno-sse4. * config/i386/i386.opt (msse4): Remove RejectNegative. (mno-sse4): Remove the entry. * config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p): Remove special code which handles mno-sse4. Diff: --- gcc/common/config/i386/i386-common.cc | 23 --- gcc/config/i386/i386-options.cc | 7 --- gcc/config/i386/i386.opt | 6 +- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index 4815fbc4d359..296df3b32304 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -1519,17 +1519,18 @@ ix86_handle_option (struct gcc_options *opts, return true; case OPT_msse4: - gcc_assert (value != 0); - opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET; - opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET; - return true; - -case OPT_mno_sse4: - gcc_assert (value != 0); - opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET; - opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET; - opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE4_UNSET; - opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE4_UNSET; + if (value) + { + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET; + } + else + { + opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET; + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE4_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE4_UNSET; + } return true; case OPT_msse4a: diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 964449fa8cd7..45aa9b4b732f 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -1271,13 +1271,6 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], } } - /* Fixup -msse4 which is RejectNegative to -mno-sse4 when negated. */ - if (opt == OPT_msse4 && !opt_set_p) - { - opt = OPT_mno_sse4; - opt_set_p = true; - } - /* Process the option. */ if (opt == N_OPTS) { diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 27d34bd64eaa..0abf13480f57 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -721,13 +721,9 @@ Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation. msse4 -Target RejectNegative Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save +Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation. -mno-sse4 -Target RejectNegative InverseMask(ISA_SSE4_1) Var(ix86_isa_flags) Save -Do not support SSE4.1 and SSE4.2 built-in functions and code generation. - msse5 Target Undocumented Alias(mavx) Warn(%<-msse5%> was removed) ;; Deprecated