Re: [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges
On 05/03/2017 01:10 AM, Uros Bizjak wrote: The order of subexpressions of parallel in general does not matter. Thanks, this makes things much clearer. Also, I'm wondering if there's anything wrong with calling ix86_gen_leave () and plucking the insns out of the generated parallel insn and moving that into my own parallel rather than generating them in my own function. I guess all the matters is what is cleanest. Hm... I'd rather see subexpressions generated "by hand". OK. While we're on the topic, are you OK with my changes to ix86_emit_leave to generate the notes or would you prefer those by hand as well? Also, are these predicates what you had in mind? (I haven't actually tested them just yet.) (define_predicate "save_multiple" (match_code "parallel") { const unsigned len = XVECLEN (op, 0); unsigned i; /* Starting from end of vector, count register saves. */ for (i = 0; i < len; ++i) { rtx src, dest, addr; rtx e = XVECEXP (op, 0, len - 1 - i); if (GET_CODE (e) != SET) break; src = SET_SRC (e); dest = SET_DEST (e); if (!REG_P (src) || !MEM_P (dest)) break; addr = XEXP (dest, 0); /* Good if dest address is in RAX. */ if (REG_P (addr) && REGNO (addr) == AX_REG) continue; /* Good if dest address is offset of RAX. */ if (GET_CODE (addr) == PLUS && REG_P (XEXP (addr, 0)) && REGNO (XEXP (addr, 0)) == AX_REG) continue; break; } return (i >= 12 && i <= 18); }) (define_predicate "restore_multiple" (match_code "parallel") { const unsigned len = XVECLEN (op, 0); unsigned i; /* Starting from end of vector, count register restores. */ for (i = 0; i < len; ++i) { rtx src, dest, addr; rtx e = XVECEXP (op, 0, len - 1 - i); if (GET_CODE (e) != SET) break; src = SET_SRC (e); dest = SET_DEST (e); if (!MEM_P (src) || !REG_P (dest)) break; addr = XEXP (src, 0); /* Good if src address is in RSI. */ if (REG_P (addr) && REGNO (addr) == SI_REG) continue; /* Good if src address is offset of RSI. */ if (GET_CODE (addr) == PLUS && REG_P (XEXP (addr, 0)) && REGNO (XEXP (addr, 0)) == SI_REG) continue; break; } return (i >= 12 && i <= 18); }) Thanks, Daniel
[testsuite, committed] Add quotes to numerical comment arg of dg directive
Hi, this patch adds quotes to the comment argument of a dg-message directive when the comment is a plain number, to avoid confusion with line numbers. Committed as obvious. Thanks, - Tom Add quotes to numerical comment arg of dg directive 2017-05-01 Tom de Vries * c-c++-common/goacc/data-default-1.c: Add quotes to numerical comment arg of dg directive. * c-c++-common/goacc/routine-3.c: Same. * c-c++-common/goacc/routine-4.c: Same. --- gcc/testsuite/c-c++-common/goacc/data-default-1.c | 4 ++-- gcc/testsuite/c-c++-common/goacc/routine-3.c | 4 ++-- gcc/testsuite/c-c++-common/goacc/routine-4.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/gcc/testsuite/c-c++-common/goacc/data-default-1.c b/gcc/testsuite/c-c++-common/goacc/data-default-1.c index 631032e..ec53cbe 100644 --- a/gcc/testsuite/c-c++-common/goacc/data-default-1.c +++ b/gcc/testsuite/c-c++-common/goacc/data-default-1.c @@ -6,13 +6,13 @@ int main () int n = 2; int ary[2]; -#pragma acc parallel default (none) /* { dg-message "'parallel' construct" 2 } */ +#pragma acc parallel default (none) /* { dg-message "'parallel' construct" "2" } */ { ary[0] /* { dg-error "not specified in enclosing" } */ = n; /* { dg-error "not specified in enclosing" } */ } -#pragma acc kernels default (none) /* { dg-message "'kernels' construct" 2 } */ +#pragma acc kernels default (none) /* { dg-message "'kernels' construct" "2" } */ { ary[0] /* { dg-error "not specified in enclosing" } */ = n; /* { dg-error "not specified in enclosing" } */ diff --git a/gcc/testsuite/c-c++-common/goacc/routine-3.c b/gcc/testsuite/c-c++-common/goacc/routine-3.c index b322d26..eaea470 100644 --- a/gcc/testsuite/c-c++-common/goacc/routine-3.c +++ b/gcc/testsuite/c-c++-common/goacc/routine-3.c @@ -2,7 +2,7 @@ #pragma acc routine gang int -gang () /* { dg-message "declared here" 3 } */ +gang () /* { dg-message "declared here" "3" } */ { #pragma acc loop gang worker vector for (int i = 0; i < 10; i++) @@ -14,7 +14,7 @@ gang () /* { dg-message "declared here" 3 } */ #pragma acc routine worker int -worker () /* { dg-message "declared here" 2 } */ +worker () /* { dg-message "declared here" "2" } */ { #pragma acc loop worker vector for (int i = 0; i < 10; i++) diff --git a/gcc/testsuite/c-c++-common/goacc/routine-4.c b/gcc/testsuite/c-c++-common/goacc/routine-4.c index 3e5fc4f..efc4a0b 100644 --- a/gcc/testsuite/c-c++-common/goacc/routine-4.c +++ b/gcc/testsuite/c-c++-common/goacc/routine-4.c @@ -35,7 +35,7 @@ void seq (void) red ++; } -void vector (void) /* { dg-message "declared here" 1 } */ +void vector (void) /* { dg-message "declared here" "1" } */ { gang (); /* { dg-error "routine call uses" } */ worker (); /* { dg-error "routine call uses" } */ @@ -61,7 +61,7 @@ void vector (void) /* { dg-message "declared here" 1 } */ red ++; } -void worker (void) /* { dg-message "declared here" 2 } */ +void worker (void) /* { dg-message "declared here" "2" } */ { gang (); /* { dg-error "routine call uses" } */ worker (); @@ -87,7 +87,7 @@ void worker (void) /* { dg-message "declared here" 2 } */ red ++; } -void gang (void) /* { dg-message "declared here" 3 } */ +void gang (void) /* { dg-message "declared here" "3" } */ { gang (); worker ();
[testsuite, committed] Replace absolute line numbers in c-c++-common
Hi, this patch replaces absolute line numbers in the c-c++-common directory. Committed as obvious. Thanks, - Tom Replace absolute line numbers in c-c++-common 2017-05-01 Tom de Vries PR testsuite/80557 * c-c++-common/Wshift-negative-value-1.c: Replace absolute line numbers. * c-c++-common/Wshift-negative-value-2.c: Same. * c-c++-common/Wshift-negative-value-3.c: Same. * c-c++-common/Wshift-negative-value-4.c: Same. * c-c++-common/cilk-plus/AN/pr57541.c: Same. * c-c++-common/cpp/pr60400.c: Same. * c-c++-common/fmax-errors.c: Same. * c-c++-common/goacc/data-2.c: Same. * c-c++-common/goacc/host_data-2.c: Same. * c-c++-common/gomp/simd4.c: Same. * c-c++-common/pr28656.c: Same. * c-c++-common/pr43395.c: Same. * c-c++-common/torture/pr57945.c: Same. --- gcc/testsuite/c-c++-common/Wshift-negative-value-1.c | 3 +-- gcc/testsuite/c-c++-common/Wshift-negative-value-2.c | 5 ++--- gcc/testsuite/c-c++-common/Wshift-negative-value-3.c | 5 ++--- gcc/testsuite/c-c++-common/Wshift-negative-value-4.c | 5 ++--- gcc/testsuite/c-c++-common/cilk-plus/AN/pr57541.c| 2 +- gcc/testsuite/c-c++-common/cpp/pr60400.c | 8 gcc/testsuite/c-c++-common/fmax-errors.c | 2 +- gcc/testsuite/c-c++-common/goacc/data-2.c| 2 +- gcc/testsuite/c-c++-common/goacc/host_data-2.c | 8 gcc/testsuite/c-c++-common/gomp/simd4.c | 4 ++-- gcc/testsuite/c-c++-common/pr28656.c | 14 +++--- gcc/testsuite/c-c++-common/pr43395.c | 12 ++-- gcc/testsuite/c-c++-common/torture/pr57945.c | 3 +-- 13 files changed, 34 insertions(+), 39 deletions(-) diff --git a/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c b/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c index 8f14034..7df1804 100644 --- a/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c +++ b/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c @@ -7,6 +7,7 @@ enum E { A = 0 << 1, B = 1 << 1, C = -1 << 1, /* { dg-warning "left shift of negative value|not an integer constant" } */ + /* { dg-error "left operand of shift expression" "shift" { target c++ } .-1 } */ D = 0 >> 1, E = 1 >> 1, F = -1 >> 1 @@ -47,5 +48,3 @@ right (int x) r += -1U >> x; return r; } - -/* { dg-error "left operand of shift expression" "shift" { target c++ } 9 } */ diff --git a/gcc/testsuite/c-c++-common/Wshift-negative-value-2.c b/gcc/testsuite/c-c++-common/Wshift-negative-value-2.c index 55523a5..3a60ed7 100644 --- a/gcc/testsuite/c-c++-common/Wshift-negative-value-2.c +++ b/gcc/testsuite/c-c++-common/Wshift-negative-value-2.c @@ -7,6 +7,8 @@ enum E { A = 0 << 1, B = 1 << 1, C = -1 << 1, /* { dg-warning "left shift of negative value" } */ + /* { dg-error "not an integer constant" "no constant" { target c++ } .-1 } */ + /* { dg-error "left operand of shift expression" "shift" { target c++ } .-2 } */ D = 0 >> 1, E = 1 >> 1, F = -1 >> 1 @@ -47,6 +49,3 @@ right (int x) r += -1U >> x; return r; } - -/* { dg-error "not an integer constant" "no constant" { target c++ } 9 } */ -/* { dg-error "left operand of shift expression" "shift" { target c++ } 9 } */ diff --git a/gcc/testsuite/c-c++-common/Wshift-negative-value-3.c b/gcc/testsuite/c-c++-common/Wshift-negative-value-3.c index 1295b72..503ca61 100644 --- a/gcc/testsuite/c-c++-common/Wshift-negative-value-3.c +++ b/gcc/testsuite/c-c++-common/Wshift-negative-value-3.c @@ -7,6 +7,8 @@ enum E { A = 0 << 1, B = 1 << 1, C = -1 << 1, + /* { dg-error "not an integer constant" "no constant" { target c++ } .-1 } */ + /* { dg-error "left operand of shift expression" "shift" { target c++ } .-2 } */ D = 0 >> 1, E = 1 >> 1, F = -1 >> 1 @@ -47,6 +49,3 @@ right (int x) r += -1U >> x; return r; } - -/* { dg-error "not an integer constant" "no constant" { target c++ } 9 } */ -/* { dg-error "left operand of shift expression" "shift" { target c++ } 9 } */ diff --git a/gcc/testsuite/c-c++-common/Wshift-negative-value-4.c b/gcc/testsuite/c-c++-common/Wshift-negative-value-4.c index 3088220..fa7cb4e 100644 --- a/gcc/testsuite/c-c++-common/Wshift-negative-value-4.c +++ b/gcc/testsuite/c-c++-common/Wshift-negative-value-4.c @@ -7,6 +7,8 @@ enum E { A = 0 << 1, B = 1 << 1, C = -1 << 1, + /* { dg-error "not an integer constant" "no constant" { target c++ } .-1 } */ + /* { dg-error "left operand of shift expression" "shift" { target c++ } .-2 } */ D = 0 >> 1, E = 1 >> 1, F = -1 >> 1 @@ -47,6 +49,3 @@ right (int x) r += -1U >> x; return r; } - -/* { dg-error "not an integer constant" "no constant" { target c++ } 9 } */ -/* { dg-error "left operand of shift expression" "shift" { target c++ } 9 } */ diff --git a/gcc/testsuite/c-c++-common/cilk-plus/AN/pr57541.c b/gcc/testsuite/c-c++-common/cilk-plus/AN/pr57541.c index f379e46..a956d0e 100755 --- a/gcc/testsuite/c-c++-common/cilk-plus/AN/pr57541.c +++ b/gcc/testsuite/c-c++-common/cilk-p
[committed] Wrap tree-data-ref.h macro arguments
Tested on aarch64-linux-gnu and x86_64-linux-gnu. Installed as obvious. Thanks, Richard gcc/ 2016-05-03 Richard Sandiford * tree-data-ref.h (SUB_CONFLICTS_IN_A): Wrap SUB argument in brackets. (SUB_CONFLICTS_IN_B, SUB_LAST_CONFLICT, SUB_DISTANCE): Likewise. (DDR_A): Wrap DDR argument in brackets. (DDR_B, DDR_AFFINE_P, DDR_ARE_DEPENDENT, DDR_SUBSCRIPTS): Likewise. (DDR_LOOP_NEST, DDR_INNER_LOOP, DDR_SELF_REFERENCE): Likewise. (DDR_REVERSED_P): Likewise. Index: gcc/tree-data-ref.h === --- gcc/tree-data-ref.h 2017-02-23 19:54:15.0 + +++ gcc/tree-data-ref.h 2017-05-03 08:48:11.977015306 +0100 @@ -209,10 +209,10 @@ struct subscript typedef struct subscript *subscript_p; -#define SUB_CONFLICTS_IN_A(SUB) SUB->conflicting_iterations_in_a -#define SUB_CONFLICTS_IN_B(SUB) SUB->conflicting_iterations_in_b -#define SUB_LAST_CONFLICT(SUB) SUB->last_conflict -#define SUB_DISTANCE(SUB) SUB->distance +#define SUB_CONFLICTS_IN_A(SUB) (SUB)->conflicting_iterations_in_a +#define SUB_CONFLICTS_IN_B(SUB) (SUB)->conflicting_iterations_in_b +#define SUB_LAST_CONFLICT(SUB) (SUB)->last_conflict +#define SUB_DISTANCE(SUB) (SUB)->distance /* A data_dependence_relation represents a relation between two data_references A and B. */ @@ -268,20 +268,20 @@ struct data_dependence_relation typedef struct data_dependence_relation *ddr_p; -#define DDR_A(DDR) DDR->a -#define DDR_B(DDR) DDR->b -#define DDR_AFFINE_P(DDR) DDR->affine_p -#define DDR_ARE_DEPENDENT(DDR) DDR->are_dependent -#define DDR_SUBSCRIPTS(DDR) DDR->subscripts +#define DDR_A(DDR) (DDR)->a +#define DDR_B(DDR) (DDR)->b +#define DDR_AFFINE_P(DDR) (DDR)->affine_p +#define DDR_ARE_DEPENDENT(DDR) (DDR)->are_dependent +#define DDR_SUBSCRIPTS(DDR) (DDR)->subscripts #define DDR_SUBSCRIPT(DDR, I) DDR_SUBSCRIPTS (DDR)[I] #define DDR_NUM_SUBSCRIPTS(DDR) DDR_SUBSCRIPTS (DDR).length () -#define DDR_LOOP_NEST(DDR) DDR->loop_nest +#define DDR_LOOP_NEST(DDR) (DDR)->loop_nest /* The size of the direction/distance vectors: the number of loops in the loop nest. */ #define DDR_NB_LOOPS(DDR) (DDR_LOOP_NEST (DDR).length ()) -#define DDR_INNER_LOOP(DDR) DDR->inner_loop -#define DDR_SELF_REFERENCE(DDR) DDR->self_reference_p +#define DDR_INNER_LOOP(DDR) (DDR)->inner_loop +#define DDR_SELF_REFERENCE(DDR) (DDR)->self_reference_p #define DDR_DIST_VECTS(DDR) ((DDR)->dist_vects) #define DDR_DIR_VECTS(DDR) ((DDR)->dir_vects) @@ -293,7 +293,7 @@ #define DDR_DIR_VECT(DDR, I) \ DDR_DIR_VECTS (DDR)[I] #define DDR_DIST_VECT(DDR, I) \ DDR_DIST_VECTS (DDR)[I] -#define DDR_REVERSED_P(DDR) DDR->reversed_p +#define DDR_REVERSED_P(DDR) (DDR)->reversed_p bool dr_analyze_innermost (struct data_reference *, struct loop *);
Alternative check for vector refs with same alignment
vect_find_same_alignment_drs uses the ddr dependence distance to tell whether two references have the same alignment. Although that's safe with the current code, there's no particular reason why a dependence distance of 0 should mean that the accesses start on the same byte. E.g. a reference to a full complex value could in principle depend on a reference to the imaginary component. A later patch adds support for this kind of dependence. On the other side, checking modulo vf is pessimistic when the step divided by the element size is a factor of 2. This patch instead looks for cases in which the drs have the same base, offset and step, and for which the difference in their constant initial values is a multiple of the alignment. Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Thanks, Richard gcc/ 2017-05-03 Richard Sandiford * tree-vect-data-refs.c (vect_find_same_alignment_drs): Remove loop_vinfo argument and use of dependence distance vectors. Check instead whether the two references differ only in their initial value and assume that they have the same alignment if the difference is a multiple of the vector alignment. (vect_analyze_data_refs_alignment): Update call accordingly. gcc/testsuite/ * gcc.dg/vect/vect-103.c: Update wording of dump message. Index: gcc/tree-vect-data-refs.c === --- gcc/tree-vect-data-refs.c 2017-04-18 19:52:35.060164268 +0100 +++ gcc/tree-vect-data-refs.c 2017-05-03 08:48:30.536704993 +0100 @@ -2042,20 +2042,12 @@ vect_enhance_data_refs_alignment (loop_v vectorization factor. */ static void -vect_find_same_alignment_drs (struct data_dependence_relation *ddr, - loop_vec_info loop_vinfo) +vect_find_same_alignment_drs (struct data_dependence_relation *ddr) { - unsigned int i; - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); struct data_reference *dra = DDR_A (ddr); struct data_reference *drb = DDR_B (ddr); stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); - int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra; - int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb; - lambda_vector dist_v; - unsigned int loop_depth; if (DDR_ARE_DEPENDENT (ddr) == chrec_known) return; @@ -2063,48 +2055,37 @@ vect_find_same_alignment_drs (struct dat if (dra == drb) return; - if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) -return; - - /* Loop-based vectorization and known data dependence. */ - if (DDR_NUM_DIST_VECTS (ddr) == 0) -return; - - /* Data-dependence analysis reports a distance vector of zero - for data-references that overlap only in the first iteration - but have different sign step (see PR45764). - So as a sanity check require equal DR_STEP. */ - if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) + if (!operand_equal_p (DR_BASE_OBJECT (dra), DR_BASE_OBJECT (drb), + OEP_ADDRESS_OF) + || !operand_equal_p (DR_OFFSET (dra), DR_OFFSET (drb), 0) + || !operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) return; - loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr)); - FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) + /* Two references with distance zero have the same alignment. */ + offset_int diff = (wi::to_offset (DR_INIT (dra)) +- wi::to_offset (DR_INIT (drb))); + if (diff != 0) { - int dist = dist_v[loop_depth]; + /* Get the wider of the two alignments. */ + unsigned int align_a = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmtinfo_a)); + unsigned int align_b = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmtinfo_b)); + unsigned int max_align = MAX (align_a, align_b); + + /* Require the gap to be a multiple of the larger vector alignment. */ + if (!wi::multiple_of_p (diff, max_align, SIGNED)) + return; +} - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, -"dependence distance = %d.\n", dist); - - /* Same loop iteration. */ - if (dist == 0 - || (dist % vectorization_factor == 0 && dra_size == drb_size)) - { - /* Two references with distance zero have the same alignment. */ - STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb); - STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra); - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, - "accesses have the same alignment.\n"); - dump_printf (MSG_NOTE, - "dependence distance modulo vf == 0 between "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra)); - d
Handle data dependence relations with different bases
This patch tries to calculate conservatively-correct distance vectors for two references whose base addresses are not the same. It sets a new flag DDR_COULD_BE_INDEPENDENT_P if the dependence isn't guaranteed to occur. The motivating example is: struct s { int x[8]; }; void f (struct s *a, struct s *b) { for (int i = 0; i < 8; ++i) a->x[i] += b->x[i]; } in which the "a" and "b" accesses are either independent or have a dependence distance of 0 (assuming -fstrict-aliasing). Neither case prevents vectorisation, so we can vectorise without an alias check. I'd originally wanted to do the same thing for arrays as well, e.g.: void f (int a[][8], struct b[][8]) { for (int i = 0; i < 8; ++i) a[0][i] += b[0][i]; } I think this is valid because C11 6.7.6.2/6 says: For two array types to be compatible, both shall have compatible element types, and if both size specifiers are present, and are integer constant expressions, then both size specifiers shall have the same constant value. So if we access an array through an int (*)[8], it must have type X[8] or X[], where X is compatible with int. It doesn't seem possible in either case for "a[0]" and "b[0]" to overlap when "a != b". However, Richard B said that (at least in gimple) we support arbitrary overlap of arrays and allow arrays to be accessed with different dimensionality. There are examples of this in PR50067. I've therefore only handled references that end in a structure field access. There are two ways of handling these dependences in the vectoriser: use them to limit VF, or check at runtime as before. I've gone for the approach of checking at runtime if we can, to avoid limiting VF unnecessarily. We still fall back to a VF cap when runtime checks aren't allowed. The patch tests whether we queued an alias check with a dependence distance of X and then picked a VF <= X, in which case it's safe to drop the alias check. Since vect_prune_runtime_alias_check_list can be called twice with different VF for the same loop, it's no longer safe to clear may_alias_ddrs on exit. Instead we should use comp_alias_ddrs to check whether versioning is necessary. Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Thanks, Richard gcc/ 2017-05-03 Richard Sandiford * tree-data-ref.h (subscript): Add access_fn field. (data_dependence_relation): Add could_be_independent_p. (SUB_ACCESS_FN, DDR_COULD_BE_INDEPENDENT_P): New macros. (same_access_functions): Move to tree-data-ref.c. * tree-data-ref.c (ref_contains_union_access_p): New function. (dump_data_dependence_relation): Use SUB_ACCESS_FN instead of DR_ACCESS_FN. (constant_access_functions): Likewise. (add_other_self_distances): Likewise. (same_access_functions): Likewise. (Moved from tree-data-ref.h.) (initialize_data_dependence_relation): Use XCNEW and remove explicit zeroing of DDR_REVERSED_P. Look for a subsequence of access functions that have the same type. Allow the subsequence to end with different bases in some circumstances. Record the chosen access functions in SUB_ACCESS_FN. (build_classic_dist_vector_1): Replace ddr_a and ddr_b with a_index and b_index. Use SUB_ACCESS_FN instead of DR_ACCESS_FN. (subscript_dependence_tester_1): Likewise dra and drb. (build_classic_dist_vector): Update calls accordingly. (subscript_dependence_tester): Likewise. * tree-ssa-loop-prefetch.c (determine_loop_nest_reuse): Check DDR_COULD_BE_INDEPENDENT_P. * tree-vectorizer.h (LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Test comp_alias_ddrs instead of may_alias_ddrs. * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Try to mark for aliasing if DDR_COULD_BE_INDEPENDENT_P, but fall back to using the recorded distance vectors if that fails. (dependence_distance_ge_vf): New function. (vect_prune_runtime_alias_test_list): Use it. Don't clear LOOP_VINFO_MAY_ALIAS_DDRS. gcc/testsuite/ * gcc.dg/vect/vect-alias-check-3.c: New test. * gcc.dg/vect/vect-alias-check-4.c: Likewise. * gcc.dg/vect/vect-alias-check-5.c: Likewise. Index: gcc/tree-data-ref.h === --- gcc/tree-data-ref.h 2017-05-03 08:48:11.977015306 +0100 +++ gcc/tree-data-ref.h 2017-05-03 08:48:48.737038502 +0100 @@ -191,6 +191,9 @@ struct conflict_function struct subscript { + /* The access functions of the two references. */ + tree access_fn[2]; + /* A description of the iterations for which the elements are accessed twice. */ conflict_function *conflicting_iterations_in_a; @@ -209,6 +212,7 @@ struct subscript typedef struct subscript *subscript_p; +#define SUB_ACCESS_FN(SUB, I) (SUB)->access_fn[I] #define SUB_CONFLICTS_IN_A(SUB) (SUB)->conflicting_ite
Use base inequality for some vector alias checks
This patch checks whether two data references x and y cannot partially overlap and so are independent whenever &x != &y. We can then use this in the vectoriser to optimise alias checks. Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Thanks, Richard gcc/ 2016-05-03 Richard Sandiford * hash-traits.h (pair_hash): New struct. * tree-data-ref.h (data_dependence_relation): Add object_a and object_b fields. (DDR_OBJECT_A, DDR_OBJECT_B): New macros. * tree-data-ref.c (initialize_data_dependence_relation): Initialize DDR_OBJECT_A and DDR_OBJECT_B. * tree-vectorizer.h (vec_object_pair): New type. (_loop_vec_info): Add a check_unequal_addrs field. (LOOP_VINFO_CHECK_UNEQUAL_ADDRS): New macro. (LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Return true if there is an entry in check_unequal_addrs. Check comp_alias_ddrs instead of may_alias_ddrs. * tree-vect-loop.c (destroy_loop_vec_info): Release LOOP_VINFO_CHECK_UNEQUAL_ADDRS. (vect_analyze_loop_2): Likewise, when restarting. (vect_estimate_min_profitable_iters): Estimate the cost of LOOP_VINFO_CHECK_UNEQUAL_ADDRS. * tree-vect-data-refs.c: Include tree-hash-traits.h. (vect_prune_runtime_alias_test_list): Try to handle conflicts using LOOP_VINFO_CHECK_UNEQUAL_ADDRS, if the data dependence allows. Count such tests in the final summary. * tree-vect-loop-manip.c (chain_cond_expr): New function. (vect_create_cond_for_align_checks): Use it. (vect_create_cond_for_alias_checks): Likewise. (vect_create_cond_for_unequal_addrs): New function. (vect_loop_versioning): Call it. gcc/testsuite/ * gcc.dg/vect/vect-alias-check-6.c: New test. Index: gcc/hash-traits.h === --- gcc/hash-traits.h 2017-02-23 19:54:15.0 + +++ gcc/hash-traits.h 2017-05-03 08:48:53.312035228 +0100 @@ -301,6 +301,76 @@ struct ggc_cache_ptr_hash : pointer_hash struct nofree_string_hash : string_hash, typed_noop_remove {}; +/* Traits for pairs of values, using the first to record empty and + deleted slots. */ + +template +struct pair_hash +{ + typedef std::pair value_type; + typedef std::pair compare_type; + + static inline hashval_t hash (const value_type &); + static inline bool equal (const value_type &, const compare_type &); + static inline void remove (value_type &); + static inline void mark_deleted (value_type &); + static inline void mark_empty (value_type &); + static inline bool is_deleted (const value_type &); + static inline bool is_empty (const value_type &); +}; + +template +inline hashval_t +pair_hash ::hash (const value_type &x) +{ + return iterative_hash_hashval_t (T1::hash (x.first), T2::hash (x.second)); +} + +template +inline bool +pair_hash ::equal (const value_type &x, const compare_type &y) +{ + return T1::equal (x.first, y.first) && T2::equal (x.second, y.second); +} + +template +inline void +pair_hash ::remove (value_type &x) +{ + T1::remove (x.first); + T2::remove (x.second); +} + +template +inline void +pair_hash ::mark_deleted (value_type &x) +{ + T1::mark_deleted (x.first); +} + +template +inline void +pair_hash ::mark_empty (value_type &x) +{ + T1::mark_empty (x.first); +} + +template +inline bool +pair_hash ::is_deleted (const value_type &x) +{ + return T1::is_deleted (x.first); +} + +template +inline bool +pair_hash ::is_empty (const value_type &x) +{ + return T1::is_empty (x.first); +} + template struct default_hash_traits : T {}; template Index: gcc/tree-data-ref.h === --- gcc/tree-data-ref.h 2017-05-03 08:48:48.737038502 +0100 +++ gcc/tree-data-ref.h 2017-05-03 08:48:53.313041828 +0100 @@ -240,6 +240,13 @@ struct data_dependence_relation but the analyzer cannot be more specific. */ tree are_dependent; + /* If nonnull, COULD_BE_INDEPENDENT_P is true and the accesses are + independent when the runtime addresses of OBJECT_A and OBJECT_B + are different. The addresses of both objects are invariant in the + loop nest. */ + tree object_a; + tree object_b; + /* For each subscript in the dependence test, there is an element in this array. This is the attribute that labels the edge A->B of the data_dependence_relation. */ @@ -303,6 +310,8 @@ #define DDR_A(DDR) (DDR)->a #define DDR_B(DDR) (DDR)->b #define DDR_AFFINE_P(DDR) (DDR)->affine_p #define DDR_ARE_DEPENDENT(DDR) (DDR)->are_dependent +#define DDR_OBJECT_A(DDR) (DDR)->object_a +#define DDR_OBJECT_B(DDR) (DDR)->object_b #define DDR_SUBSCRIPTS(DDR) (DDR)->subscripts #define DDR_SUBSCRIPT(DDR, I) DDR_SUBSCRIPTS (DDR)[I] #define DDR_NUM_SUBSCRIPTS(DDR) DDR_SUBSCRIPTS (DDR).length () Index: gcc/tree-data-ref.c ===
Re: [PATCH] Optimize in VRP loads from constant arrays (take 2)
On Tue, May 02, 2017 at 02:50:16PM +0200, Richard Biener wrote: > > If array_at_struct_end_p is wrong, it should be fixed ;) > > Indeed. It was originally meant to say false if you can trust > TYPE_DOMAIN of the array but now it says false if there's some means > to constrain the array size (the DECL_P path and now your STRING_CST > one). But all callers afterwards just look at TYPE_DOMAIN ... So shall we verify that TYPE_DOMAIN is consistent with the object size in that case inside of array_at_struct_end_p? > > > I'd restructure the patch quite different, using for_each_index on the > > > ref gather an array of index pointers (bail out on sth unhandled). > > > Then I'd see if I have interesting ranges for them, if not, bail out. > > > Also compute the size product of all ranges and test that against > > > PARAM_MAX_VRP_CONSTANT_ARRAY_LOADS. Then store the minimum range > > > value in the index places (temporarily) and use get_base_ref_and_extent to > > > get at the constant "starting" offset. From there iterate using > > > the remembered indices (remember the ref tree as well via for_each_index), > > > directly adjusting the constant offset so you can feed > > > fold_ctor_reference the constant offsets of all elements that need to > > > be considered. As optimization fold_ctor_reference would know how > > > to start from the "last" offset (much refactoring would need to be > > > done here given nested ctors and multiple indices I guess). > > > > But for this, don't you want to take it over? > > I can try. Is there a PR for this? Ok, filed PR80603, it is now all yours. > > I agree that the current implementation is not very efficient and that is > > why it is also limited to that small number of iterations. > > As many cases just aren't able to use the valueize callback, handling > > arbitrary numbers of non-constant indexes would be harder. > > Sure. I'd have expected you simply handle ARRAY_REF of a VAR_DECL > and nothing else ;) That would be too simple and boring ;) Jakub
[RFC][PATCH] Introduce -fdump*-folding
Hello Last release cycle I spent quite some time with reading of IVOPTS pass dump file. Using -fdump*-details causes to generate a lot of 'Applying pattern' lines, which can make reading of a dump file more complicated. There are stats for tramp3d with -O2 and -fdump-tree-all-details. Percentage number shows how many lines are of the aforementioned pattern: tramp3d-v4.cpp.164t.ivopts: 6.34% tramp3d-v4.cpp.091t.ccp2: 5.04% tramp3d-v4.cpp.093t.cunrolli: 4.41% tramp3d-v4.cpp.129t.laddress: 3.70% tramp3d-v4.cpp.032t.ccp1: 2.31% tramp3d-v4.cpp.038t.evrp: 1.90% tramp3d-v4.cpp.033t.forwprop1: 1.74% tramp3d-v4.cpp.103t.vrp1: 1.52% tramp3d-v4.cpp.124t.forwprop3: 1.31% tramp3d-v4.cpp.181t.vrp2: 1.30% tramp3d-v4.cpp.161t.cunroll: 1.22% tramp3d-v4.cpp.027t.fixup_cfg3: 1.11% tramp3d-v4.cpp.153t.ivcanon: 1.07% tramp3d-v4.cpp.126t.ccp3: 0.96% tramp3d-v4.cpp.143t.sccp: 0.91% tramp3d-v4.cpp.185t.forwprop4: 0.82% tramp3d-v4.cpp.011t.cfg: 0.74% tramp3d-v4.cpp.096t.forwprop2: 0.50% tramp3d-v4.cpp.019t.fixup_cfg1: 0.37% tramp3d-v4.cpp.120t.phicprop1: 0.33% tramp3d-v4.cpp.133t.pre: 0.32% tramp3d-v4.cpp.182t.phicprop2: 0.27% tramp3d-v4.cpp.170t.veclower21: 0.25% tramp3d-v4.cpp.029t.einline: 0.24% I'm suggesting to add new TDF that will be allocated for that. Patch can bootstrap on ppc64le-redhat-linux and survives regression tests. Thoughts? Martin >From c1b832212576fd9f89fd738ae0cc98e9fb189c1d Mon Sep 17 00:00:00 2001 From: marxin Date: Wed, 1 Feb 2017 15:34:52 +0100 Subject: [PATCH] Introduce -fdump*-folding gcc/ChangeLog: 2017-05-03 Martin Liska * dumpfile.c: Add TDF_FOLDING. * dumpfile.h (enum tree_dump_index): Add to the enum. * genmatch.c (dt_simplify::gen_1): Use the newly added enum value. --- gcc/dumpfile.c | 1 + gcc/dumpfile.h | 7 --- gcc/genmatch.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/gcc/dumpfile.c b/gcc/dumpfile.c index 6b9a47c5a26..b9c881c103f 100644 --- a/gcc/dumpfile.c +++ b/gcc/dumpfile.c @@ -111,6 +111,7 @@ static const struct dump_option_value_info dump_options[] = {"enumerate_locals", TDF_ENUMERATE_LOCALS}, {"scev", TDF_SCEV}, {"gimple", TDF_GIMPLE}, + {"folding", TDF_FOLDING}, {"optimized", MSG_OPTIMIZED_LOCATIONS}, {"missed", MSG_MISSED_OPTIMIZATION}, {"note", MSG_NOTE}, diff --git a/gcc/dumpfile.h b/gcc/dumpfile.h index fef58f5e9b1..69c4ec0f861 100644 --- a/gcc/dumpfile.h +++ b/gcc/dumpfile.h @@ -84,9 +84,10 @@ enum tree_dump_index #define TDF_SCEV (1 << 24) /* Dump SCEV details. */ #define TDF_COMMENT (1 << 25) /* Dump lines with prefix ";;" */ #define TDF_GIMPLE (1 << 26) /* Dump in GIMPLE FE syntax */ -#define MSG_OPTIMIZED_LOCATIONS (1 << 27) /* -fopt-info optimized sources */ -#define MSG_MISSED_OPTIMIZATION (1 << 28) /* missed opportunities */ -#define MSG_NOTE (1 << 29) /* general optimization info */ +#define TDF_FOLDING (1 << 27) /* Dump folding details. */ +#define MSG_OPTIMIZED_LOCATIONS (1 << 28) /* -fopt-info optimized sources */ +#define MSG_MISSED_OPTIMIZATION (1 << 29) /* missed opportunities */ +#define MSG_NOTE (1 << 30) /* general optimization info */ #define MSG_ALL (MSG_OPTIMIZED_LOCATIONS | MSG_MISSED_OPTIMIZATION \ | MSG_NOTE) diff --git a/gcc/genmatch.c b/gcc/genmatch.c index 5621aa05b59..979d6856084 100644 --- a/gcc/genmatch.c +++ b/gcc/genmatch.c @@ -3187,7 +3187,7 @@ dt_simplify::gen_1 (FILE *f, int indent, bool gimple, operand *result) } } - fprintf_indent (f, indent, "if (dump_file && (dump_flags & TDF_DETAILS)) " + fprintf_indent (f, indent, "if (dump_file && (dump_flags & TDF_FOLDING)) " "fprintf (dump_file, \"Applying pattern "); output_line_directive (f, result ? result->location : s->match->location, true); -- 2.12.2
Re: [RFC][PATCH] Introduce -fdump*-folding
On Wed, May 3, 2017 at 1:10 AM, Martin Liška wrote: > Hello > > Last release cycle I spent quite some time with reading of IVOPTS pass > dump file. Using -fdump*-details causes to generate a lot of 'Applying > pattern' > lines, which can make reading of a dump file more complicated. Yes the folding message can get annoying. Especially when it is not clear what is being folded which is the case a lot of the time I have seen the message. Thanks, Andrew > > There are stats for tramp3d with -O2 and -fdump-tree-all-details. Percentage > number > shows how many lines are of the aforementioned pattern: > > tramp3d-v4.cpp.164t.ivopts: 6.34% > tramp3d-v4.cpp.091t.ccp2: 5.04% > tramp3d-v4.cpp.093t.cunrolli: 4.41% > tramp3d-v4.cpp.129t.laddress: 3.70% > tramp3d-v4.cpp.032t.ccp1: 2.31% > tramp3d-v4.cpp.038t.evrp: 1.90% > tramp3d-v4.cpp.033t.forwprop1: 1.74% > tramp3d-v4.cpp.103t.vrp1: 1.52% > tramp3d-v4.cpp.124t.forwprop3: 1.31% > tramp3d-v4.cpp.181t.vrp2: 1.30% >tramp3d-v4.cpp.161t.cunroll: 1.22% > tramp3d-v4.cpp.027t.fixup_cfg3: 1.11% >tramp3d-v4.cpp.153t.ivcanon: 1.07% > tramp3d-v4.cpp.126t.ccp3: 0.96% > tramp3d-v4.cpp.143t.sccp: 0.91% > tramp3d-v4.cpp.185t.forwprop4: 0.82% >tramp3d-v4.cpp.011t.cfg: 0.74% > tramp3d-v4.cpp.096t.forwprop2: 0.50% > tramp3d-v4.cpp.019t.fixup_cfg1: 0.37% > tramp3d-v4.cpp.120t.phicprop1: 0.33% >tramp3d-v4.cpp.133t.pre: 0.32% > tramp3d-v4.cpp.182t.phicprop2: 0.27% > tramp3d-v4.cpp.170t.veclower21: 0.25% >tramp3d-v4.cpp.029t.einline: 0.24% > > I'm suggesting to add new TDF that will be allocated for that. > Patch can bootstrap on ppc64le-redhat-linux and survives regression tests. > > Thoughts? > Martin
[PATCH] Improve vectorizer peeling for alignment costmodel
The following extends the very simplistic cost modeling I added somewhen late in the release process to, for all unknown misaligned refs, also apply this model for loops containing stores. The model basically says it's useless to peel for alignment if there's only a single DR that is affected or if, in case we'll end up using hw-supported misaligned loads, the cost of misaligned loads is the same as of aligned ones. Previously we'd usually align one of the stores with the theory that this improves (precious) store-bandwith. Note this is only a so slightly conservative (aka less peeling). We'll still apply peeling for alignment if you make the testcase use += because then we'll align both the load and the store from v1. Bootstrap / regtest running on x86_64-unknown-linux-gnu. Richard. 2017-05-03 Richard Biener * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): When all DRs have unknown misaligned do not always peel when there is a store but apply the same costing model as if there were only loads. * gcc.dg/vect/costmodel/x86_64/costmodel-alignpeel.c: New testcase. Index: gcc/tree-vect-data-refs.c === --- gcc/tree-vect-data-refs.c (revision 247498) +++ gcc/tree-vect-data-refs.c (working copy) @@ -1715,18 +1741,18 @@ vect_enhance_data_refs_alignment (loop_v dr0 = first_store; } - /* In case there are only loads with different unknown misalignments, use - peeling only if it may help to align other accesses in the loop or + /* Use peeling only if it may help to align other accesses in the loop or if it may help improving load bandwith when we'd end up using unaligned loads. */ tree dr0_vt = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr0))); - if (!first_store - && !STMT_VINFO_SAME_ALIGN_REFS ( - vinfo_for_stmt (DR_STMT (dr0))).length () + if (STMT_VINFO_SAME_ALIGN_REFS + (vinfo_for_stmt (DR_STMT (dr0))).length () == 0 && (vect_supportable_dr_alignment (dr0, false) != dr_unaligned_supported - || (builtin_vectorization_cost (vector_load, dr0_vt, 0) - == builtin_vectorization_cost (unaligned_load, dr0_vt, -1 + || (DR_IS_READ (dr0) + && (builtin_vectorization_cost (vector_load, dr0_vt, 0) + == builtin_vectorization_cost (unaligned_load, +dr0_vt, -1) do_peeling = false; } Index: gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-alignpeel.c === --- gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-alignpeel.c (nonexistent) +++ gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-alignpeel.c (working copy) @@ -0,0 +1,9 @@ +/* { dg-do compile } */ + +void func(double * __restrict__ v1, double * v2, unsigned n) +{ + for (unsigned i = 0; i < n; ++i) +v1[i] = v2[i]; +} + +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */
Re: [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges
On Wed, May 3, 2017 at 9:38 AM, Daniel Santos wrote: > On 05/03/2017 01:10 AM, Uros Bizjak wrote: >> >> The order of subexpressions of parallel in general does not matter. > > > Thanks, this makes things much clearer. > >>> Also, I'm wondering if there's anything wrong with calling ix86_gen_leave >>> () >>> and plucking the insns out of the generated parallel insn and moving that >>> into my own parallel rather than generating them in my own function. I >>> guess all the matters is what is cleanest. >> >> Hm... I'd rather see subexpressions generated "by hand". > > > OK. While we're on the topic, are you OK with my changes to ix86_emit_leave > to generate the notes or would you prefer those by hand as well? I think they are OK. We are effectively emitting a leave here. > Also, are these predicates what you had in mind? (I haven't actually tested > them just yet.) Yes, these look good to me. Uros. > (define_predicate "save_multiple" > (match_code "parallel") > { > const unsigned len = XVECLEN (op, 0); > unsigned i; > > /* Starting from end of vector, count register saves. */ > for (i = 0; i < len; ++i) > { > rtx src, dest, addr; > rtx e = XVECEXP (op, 0, len - 1 - i); > > if (GET_CODE (e) != SET) > break; > > src = SET_SRC (e); > dest = SET_DEST (e); > > if (!REG_P (src) || !MEM_P (dest)) > break; > > addr = XEXP (dest, 0); > > /* Good if dest address is in RAX. */ > if (REG_P (addr) && REGNO (addr) == AX_REG) > continue; > > /* Good if dest address is offset of RAX. */ > if (GET_CODE (addr) == PLUS > && REG_P (XEXP (addr, 0)) > && REGNO (XEXP (addr, 0)) == AX_REG) > continue; > > break; > } > return (i >= 12 && i <= 18); > }) > > > (define_predicate "restore_multiple" > (match_code "parallel") > { > const unsigned len = XVECLEN (op, 0); > unsigned i; > > /* Starting from end of vector, count register restores. */ > for (i = 0; i < len; ++i) > { > rtx src, dest, addr; > rtx e = XVECEXP (op, 0, len - 1 - i); > > if (GET_CODE (e) != SET) > break; > > src = SET_SRC (e); > dest = SET_DEST (e); > > if (!MEM_P (src) || !REG_P (dest)) > break; > > addr = XEXP (src, 0); > > /* Good if src address is in RSI. */ > if (REG_P (addr) && REGNO (addr) == SI_REG) > continue; > > /* Good if src address is offset of RSI. */ > if (GET_CODE (addr) == PLUS > && REG_P (XEXP (addr, 0)) > && REGNO (XEXP (addr, 0)) == SI_REG) > continue; > > break; > } > return (i >= 12 && i <= 18); > }) > > > Thanks, > Daniel >
Re: [PATCH GCC8][01/33]Handle TRUNCATE between tieable modes in rtx_cost
On Wed, May 3, 2017 at 7:17 AM, Eric Botcazou wrote: >> 2017-04-11 Bin Cheng >> >> * rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes. > > This breaks bootstrap with RTL checking: > > /home/eric/build/gcc/native/./gcc/xgcc -B/home/eric/build/gcc/native/./gcc/ - > nostdinc -x c /dev/null -S -o /dev/null -fself- > test=/home/eric/svn/gcc/gcc/testsuite/selftests > cc1: internal compiler error: RTL check: expected code 'subreg', have > 'truncate' in rtx_cost, at rtlanal.c:4169 > 0xbae338 rtl_check_failed_code1(rtx_def const*, rtx_code, char const*, int, > char const*) > /home/eric/svn/gcc/gcc/rtl.c:829 > 0xbbc9b4 rtx_cost(rtx_def*, machine_mode, rtx_code, int, bool) > /home/eric/svn/gcc/gcc/rtlanal.c:4169 > 0x8517e6 set_src_cost > /home/eric/svn/gcc/gcc/rtl.h:2685 > 0x8517e6 init_expmed_one_conv > /home/eric/svn/gcc/gcc/expmed.c:142 > 0x8517e6 init_expmed_one_mode > /home/eric/svn/gcc/gcc/expmed.c:209 > 0x853fb2 init_expmed() > /home/eric/svn/gcc/gcc/expmed.c:270 > 0xc45974 backend_init_target > /home/eric/svn/gcc/gcc/toplev.c:1665 > 0xc45974 initialize_rtl() > Sorry for disturbing, I will revert this if can't fix today. Thanks, bin > -- > Eric Botcazou
Re: [PATCH] Allow building GCC with PTX offloading even without CUDA being installed (gcc and nvptx-tools patches)
On Sat, Jan 21, 2017 at 03:50:43PM +0100, Thomas Schwinge wrote: > > In order to configure gcc to load libcuda.so.1 dynamically, > > one has to either configure it --without-cuda-driver, or without > > --with-cuda-driver=/--with-cuda-driver-lib=/--with-cuda-driver-include= > > options if cuda.h and -lcuda aren't found in the default locations. > > Would be good to have that documented ;-) -- done. > > > The nvptx-tools change > > (I'll get to that later.) I'd like to ping the nvptx-tools change. Shall I make a github pull request for that? I have additional following two further patches, the first one just to shut up -Wformat-security warning, the other one discovered today to fix build against glibc trunk - they have changed getopt related includes there and we get: In file included from /usr/include/bits/getopt_posix.h:27:0, from /usr/include/unistd.h:872, from ../nvptx-ld.c:23: /usr/include/bits/getopt_core.h:91:12: error: declaration of 'int getopt(int, char* const*, const char*) throw ()' has a different exception specifier extern int getopt (int ___argc, char *const *___argv, const char *__shortopts) ^~ In file included from ../nvptx-ld.c:22:0: ../include/getopt.h:113:12: note: from previous declaration 'int getopt(int, char* const*, const char*)' extern int getopt (int argc, char *const *argv, const char *shortopts); ^~ Jakub --- nvptx-tools/configure.ac +++ nvptx-tools/configure.ac @@ -51,6 +51,7 @@ LIBS="$LIBS -lcuda" AC_CHECK_FUNCS([[cuGetErrorName] [cuGetErrorString]]) AC_CHECK_DECLS([[cuGetErrorName], [cuGetErrorString]], [], [], [[#include ]]) +AC_CHECK_HEADERS(unistd.h sys/stat.h) AC_MSG_CHECKING([for extra programs to build requiring -lcuda]) NVPTX_RUN= --- nvptx-tools/include/libiberty.h +++ nvptx-tools/include/libiberty.h @@ -390,6 +390,17 @@ extern void hex_init (void); /* Save files used for communication between processes. */ #define PEX_SAVE_TEMPS 0x4 +/* Max number of alloca bytes per call before we must switch to malloc. + + ?? Swiped from gnulib's regex_internal.h header. Is this actually + the case? This number seems arbitrary, though sane. + + The OS usually guarantees only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + allocate anything larger than 4096 bytes. Also care for the possibility + of a few compiler-allocated temporary stack slots. */ +#define MAX_ALLOCA_SIZE4032 + /* Prepare to execute one or more programs, with standard output of each program fed to standard input of the next. FLAGS As above. --- nvptx-tools/nvptx-as.c +++ nvptx-tools/nvptx-as.c @@ -30,6 +30,9 @@ #include #include #include +#ifdef HAVE_SYS_STAT_H +#include +#endif #include #define obstack_chunk_alloc malloc #define obstack_chunk_free free @@ -42,6 +45,38 @@ #include "version.h" +#ifndef R_OK +#define R_OK 4 +#define W_OK 2 +#define X_OK 1 +#endif + +#ifndef DIR_SEPARATOR +# define DIR_SEPARATOR '/' +#endif + +#if defined (_WIN32) || defined (__MSDOS__) \ +|| defined (__DJGPP__) || defined (__OS2__) +# define HAVE_DOS_BASED_FILE_SYSTEM +# define HAVE_HOST_EXECUTABLE_SUFFIX +# define HOST_EXECUTABLE_SUFFIX ".exe" +# ifndef DIR_SEPARATOR_2 +#define DIR_SEPARATOR_2 '\\' +# endif +# define PATH_SEPARATOR ';' +#else +# define PATH_SEPARATOR ':' +#endif + +#ifndef DIR_SEPARATOR_2 +# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR) +#else +# define IS_DIR_SEPARATOR(ch) \ + (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2)) +#endif + +#define DIR_UP ".." + static const char *outname = NULL; static void __attribute__ ((format (printf, 1, 2))) @@ -816,7 +851,7 @@ traverse (void **slot, void *data) } static void -process (FILE *in, FILE *out) +process (FILE *in, FILE *out, int verify, const char *outname) { symbol_table = htab_create (500, hash_string_hash, hash_string_eq, NULL); @@ -824,6 +859,18 @@ process (FILE *in, FILE *out) const char *input = read_file (in); Token *tok = tokenize (input); + /* By default, when ptxas is not in PATH, do minimalistic verification, + just require that the first non-comment directive is .version. */ + if (verify < 0) +{ + size_t i; + for (i = 0; tok[i].kind == K_comment; i++) + ; + if (tok[i].kind != K_dotted || !is_keyword (&tok[i], "version")) + fatal_error ("missing .version directive at start of file '%s'", +outname); +} + do tok = parse_file (tok); while (tok->kind); @@ -897,9 +944,83 @@ fork_execute (const char *prog, char *const *argv) do_wait (prog, pex); } +/* Determine if progname is available in PATH. */ +static bool +program_available (const char *progname) +{ + char *temp = getenv ("PATH"); + if (temp) +{ + char *startp, *endp, *nstore, *alloc_ptr = NULL; + size_t
Re: [gcn][patch] Add -mgpu option and plumb in assembler/linker
On 02/05/17 18:08, Martin Jambor wrote: Hi Andrew, sorry for replying only now but yesterday was public holiday here and I am still only in the process of recovering from a long weekend. No problem, the UK had the same. :-) While the only objection I have is the C++ style comment in config/gcn/gcn.c, another problem, for me at least... On Fri, Apr 28, 2017 at 06:06:39PM +0100, Andrew Stubbs wrote: This patch, for the "gcn" branch, does three things: 1. Add specs to drive the LLVM assembler and linker. It requires them to be installed as "as" and "ld", under $target/bin, but then the compiler Just Works with these specs. ...is that I do not have llvm linker at hand and without it I did not manage to make the patch produce loadable code. Because ROCm 1.5 has been released today, I will update our environment, which is a bit obsolete, get llvm ld and try again. This might take me a few days, so please bear with me for a little more, I would like to make sure it works on carrizos. Understood. I do not have a carrizo to test on. All my testing will be with FuryX Fiji discrete GPUs. The main difference, from the software point of view, is that the shared memory must be handled a little differently. (The existing HSA back-end appears incompatible, as are the samples from the HSA Foundation sources; the ROCm samples work fine.) However, the HSACO binary must encode the right magic numbers or the driver rejects it, hence the need for this patch. Andrew
[PATCH] Revert part of PR80492
This change unnecessarily pessimizes some cases. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2017-05-03 Richard Biener Revert PR tree-optimization/80492 * tree-ssa-alias.c (decl_refs_may_alias_p): Handle compare_base_decls returning dont-know properly. Index: gcc/tree-ssa-alias.c === --- gcc/tree-ssa-alias.c(revision 247492) +++ gcc/tree-ssa-alias.c(working copy) @@ -1096,16 +1096,13 @@ decl_refs_may_alias_p (tree ref1, tree b { gcc_checking_assert (DECL_P (base1) && DECL_P (base2)); - int cmp = compare_base_decls (base1, base2); - /* If both references are based on different variables, they cannot alias. */ - if (cmp == 0) + if (compare_base_decls (base1, base2) == 0) return false; /* If both references are based on the same variable, they cannot alias if the accesses do not overlap. */ - if (cmp == 1 - && !ranges_overlap_p (offset1, max_size1, offset2, max_size2)) + if (!ranges_overlap_p (offset1, max_size1, offset2, max_size2)) return false; /* For components with variable position, the above test isn't sufficient,
Re: [PATCH, GCC/ARM, stage4] Set mode for success result of atomic compare and swap
Hi Kyrill, On 19/04/17 14:34, Kyrill Tkachov wrote: Hi Thomas, On 12/04/17 09:59, Thomas Preudhomme wrote: Hi, Currently atomic_compare_and_swap_1 define_insn do not have a mode set for the destination of the set indicating the success result of the instruction. This is because the operand can be either a CC_Z register (for 32-bit targets) or a SI register (for 16-bit Thumb targets). This result in lack of checking for the mode. This commit use a new CCSI iterator to solve this issue while avoiding duplication of the patterns. The insn name are kept unique by using attributes tied to the iterator (SIDI:mode and CCSI:arch) instead of usign the builtin mode attribute. Expander arm_expand_compare_and_swap is also adapted accordingly. ChangeLog entry is as follows: *** gcc/ChangeLog *** 2017-04-11 Thomas Preud'homme * config/arm/iterators.md (CCSI): New mode iterator. (arch): New mode attribute. * config/arm/sync.md (atomic_compare_and_swap_1): Rename into ... (atomic_compare_and_swap_1): This and ... (atomic_compare_and_swap_1): This. Use CCSI code iterator for success result mode. * config/arm/arm.c (arm_expand_compare_and_swap): Adapt code to use the corresponding new insn generators. Testing: arm-none-eabi cross-compiler built successfully for ARMv8-M Mainline and Baseline without the lack of destination mode warning in sync.md. Testsuite show no regression. Thanks for fixing these warnings. The code looks ok to me but I'd like to make sure that the rest of the arm atomic targets are not adversely affected, so please also do a test run for ARMv7-A and ARMv8-A targets. Also, a bootstrap is required as always. Hi Kyrill, Bootstrapped and ran the testsuite for both ARMv7-A and ARMv8-A in both ARM and Thumb mode without any regression. I've also verified that a number of atomic related testcases [1][2] get the same code generation for ARMv7-A in ARM and Thumb mode as well as ARMv8-M Baseline. [1] For ARMv7-A ARM and Thumb mode, the following testcases were considered: gcc/testsuite/gcc.dg/atomic-compare-exchange-1.c gcc/testsuite/gcc.dg/atomic-compare-exchange-2.c gcc/testsuite/gcc.dg/atomic-compare-exchange-3.c gcc/testsuite/gcc.dg/atomic-exchange-1.c gcc/testsuite/gcc.dg/atomic-exchange-2.c gcc/testsuite/gcc.dg/atomic-exchange-3.c gcc/testsuite/gcc.dg/atomic-fence.c gcc/testsuite/gcc.dg/atomic-flag.c gcc/testsuite/gcc.dg/atomic-generic.c gcc/testsuite/gcc.dg/atomic-generic-aux.c gcc/testsuite/gcc.dg/atomic-invalid-2.c gcc/testsuite/gcc.dg/atomic-load-1.c gcc/testsuite/gcc.dg/atomic-load-2.c gcc/testsuite/gcc.dg/atomic-load-3.c gcc/testsuite/gcc.dg/atomic-lockfree.c gcc/testsuite/gcc.dg/atomic-lockfree-aux.c gcc/testsuite/gcc.dg/atomic-noinline.c gcc/testsuite/gcc.dg/atomic-noinline-aux.c gcc/testsuite/gcc.dg/atomic-op-1.c gcc/testsuite/gcc.dg/atomic-op-2.c gcc/testsuite/gcc.dg/atomic-op-3.c gcc/testsuite/gcc.dg/atomic-op-6.c gcc/testsuite/gcc.dg/atomic-store-1.c gcc/testsuite/gcc.dg/atomic-store-2.c gcc/testsuite/gcc.dg/atomic-store-3.c gcc/testsuite/g++.dg/ext/atomic-1.C gcc/testsuite/g++.dg/ext/atomic-2.C gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire-1.c gcc/testsuite/gcc.target/arm/atomic-op-acq_rel-1.c gcc/testsuite/gcc.target/arm/atomic-op-acquire-1.c gcc/testsuite/gcc.target/arm/atomic-op-char-1.c gcc/testsuite/gcc.target/arm/atomic-op-consume-1.c gcc/testsuite/gcc.target/arm/atomic-op-int-1.c gcc/testsuite/gcc.target/arm/atomic-op-relaxed-1.c gcc/testsuite/gcc.target/arm/atomic-op-release-1.c gcc/testsuite/gcc.target/arm/atomic-op-seq_cst-1.c gcc/testsuite/gcc.target/arm/atomic-op-short-1.c gcc/testsuite/gcc.target/arm/atomic_loaddi_1.c gcc/testsuite/gcc.target/arm/atomic_loaddi_4.c gcc/testsuite/gcc.target/arm/atomic_loaddi_7.c gcc/testsuite/gcc.target/arm/sync-1.c gcc/testsuite/gcc.target/arm/synchronize.c gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c libstdc++-v3/testsuite/29_atomics/atomic/60658.cc libstdc++-v3/testsuite/29_atomics/atomic/62259.cc libstdc++-v3/testsuite/29_atomics/atomic/64658.cc libstdc++-v3/testsuite/29_atomics/atomic/65147.cc libstdc++-v3/testsuite/29_atomics/atomic/65913.cc libstdc++-v3/testsuite/29_atomics/atomic/70766.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/49445.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/constexpr.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/copy_list.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/default.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/direct_list.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/single_value.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/user_pod.cc libstdc++-v3/testsuite/29_atomics/atomic/operators/51811.cc libstdc++-v3/testsuite/29_atomics/atomic/operators/56011.cc libstdc++-v3/testsuite/29_atomics/atomic/operators/integral_assignment.cc libstdc++-v3/testsuite/29_a
Re: [PATCH, GCC/ARM, stage4] Set mode for success result of atomic compare and swap
Hi Thomas, On 03/05/17 10:39, Thomas Preudhomme wrote: Hi Kyrill, On 19/04/17 14:34, Kyrill Tkachov wrote: Hi Thomas, On 12/04/17 09:59, Thomas Preudhomme wrote: Hi, Currently atomic_compare_and_swap_1 define_insn do not have a mode set for the destination of the set indicating the success result of the instruction. This is because the operand can be either a CC_Z register (for 32-bit targets) or a SI register (for 16-bit Thumb targets). This result in lack of checking for the mode. This commit use a new CCSI iterator to solve this issue while avoiding duplication of the patterns. The insn name are kept unique by using attributes tied to the iterator (SIDI:mode and CCSI:arch) instead of usign the builtin mode attribute. Expander arm_expand_compare_and_swap is also adapted accordingly. ChangeLog entry is as follows: *** gcc/ChangeLog *** 2017-04-11 Thomas Preud'homme * config/arm/iterators.md (CCSI): New mode iterator. (arch): New mode attribute. * config/arm/sync.md (atomic_compare_and_swap_1): Rename into ... (atomic_compare_and_swap_1): This and ... (atomic_compare_and_swap_1): This. Use CCSI code iterator for success result mode. * config/arm/arm.c (arm_expand_compare_and_swap): Adapt code to use the corresponding new insn generators. Testing: arm-none-eabi cross-compiler built successfully for ARMv8-M Mainline and Baseline without the lack of destination mode warning in sync.md. Testsuite show no regression. Thanks for fixing these warnings. The code looks ok to me but I'd like to make sure that the rest of the arm atomic targets are not adversely affected, so please also do a test run for ARMv7-A and ARMv8-A targets. Also, a bootstrap is required as always. Hi Kyrill, Bootstrapped and ran the testsuite for both ARMv7-A and ARMv8-A in both ARM and Thumb mode without any regression. I've also verified that a number of atomic related testcases [1][2] get the same code generation for ARMv7-A in ARM and Thumb mode as well as ARMv8-M Baseline. [1] For ARMv7-A ARM and Thumb mode, the following testcases were considered: gcc/testsuite/gcc.dg/atomic-compare-exchange-1.c gcc/testsuite/gcc.dg/atomic-compare-exchange-2.c gcc/testsuite/gcc.dg/atomic-compare-exchange-3.c gcc/testsuite/gcc.dg/atomic-exchange-1.c gcc/testsuite/gcc.dg/atomic-exchange-2.c gcc/testsuite/gcc.dg/atomic-exchange-3.c gcc/testsuite/gcc.dg/atomic-fence.c gcc/testsuite/gcc.dg/atomic-flag.c gcc/testsuite/gcc.dg/atomic-generic.c gcc/testsuite/gcc.dg/atomic-generic-aux.c gcc/testsuite/gcc.dg/atomic-invalid-2.c gcc/testsuite/gcc.dg/atomic-load-1.c gcc/testsuite/gcc.dg/atomic-load-2.c gcc/testsuite/gcc.dg/atomic-load-3.c gcc/testsuite/gcc.dg/atomic-lockfree.c gcc/testsuite/gcc.dg/atomic-lockfree-aux.c gcc/testsuite/gcc.dg/atomic-noinline.c gcc/testsuite/gcc.dg/atomic-noinline-aux.c gcc/testsuite/gcc.dg/atomic-op-1.c gcc/testsuite/gcc.dg/atomic-op-2.c gcc/testsuite/gcc.dg/atomic-op-3.c gcc/testsuite/gcc.dg/atomic-op-6.c gcc/testsuite/gcc.dg/atomic-store-1.c gcc/testsuite/gcc.dg/atomic-store-2.c gcc/testsuite/gcc.dg/atomic-store-3.c gcc/testsuite/g++.dg/ext/atomic-1.C gcc/testsuite/g++.dg/ext/atomic-2.C gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire-1.c gcc/testsuite/gcc.target/arm/atomic-op-acq_rel-1.c gcc/testsuite/gcc.target/arm/atomic-op-acquire-1.c gcc/testsuite/gcc.target/arm/atomic-op-char-1.c gcc/testsuite/gcc.target/arm/atomic-op-consume-1.c gcc/testsuite/gcc.target/arm/atomic-op-int-1.c gcc/testsuite/gcc.target/arm/atomic-op-relaxed-1.c gcc/testsuite/gcc.target/arm/atomic-op-release-1.c gcc/testsuite/gcc.target/arm/atomic-op-seq_cst-1.c gcc/testsuite/gcc.target/arm/atomic-op-short-1.c gcc/testsuite/gcc.target/arm/atomic_loaddi_1.c gcc/testsuite/gcc.target/arm/atomic_loaddi_4.c gcc/testsuite/gcc.target/arm/atomic_loaddi_7.c gcc/testsuite/gcc.target/arm/sync-1.c gcc/testsuite/gcc.target/arm/synchronize.c gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c libstdc++-v3/testsuite/29_atomics/atomic/60658.cc libstdc++-v3/testsuite/29_atomics/atomic/62259.cc libstdc++-v3/testsuite/29_atomics/atomic/64658.cc libstdc++-v3/testsuite/29_atomics/atomic/65147.cc libstdc++-v3/testsuite/29_atomics/atomic/65913.cc libstdc++-v3/testsuite/29_atomics/atomic/70766.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/49445.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/constexpr.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/copy_list.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/default.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/direct_list.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/single_value.cc libstdc++-v3/testsuite/29_atomics/atomic/cons/user_pod.cc libstdc++-v3/testsuite/29_atomics/atomic/operators/51811.cc libstdc++-v3/testsuite/29_atomics/atomic/operators/56011.cc libstdc++-v3/testsuite/29_atomics/atomic/operators/inte
[PATCH] Doxygen: add default location for filters and output folder.
On 04/28/2017 02:03 PM, Martin Liška wrote: > Why do we not make a default for OUTPUT_DIRECTORY and INPUT_FILTER ? > I would expect people are running doxygen from GCC root folder. Hi. I'm suggesting following patch for that. Martin >From 4623de7cf43598de0f778dc976a3b219220716e3 Mon Sep 17 00:00:00 2001 From: marxin Date: Wed, 3 May 2017 11:42:41 +0200 Subject: [PATCH] Doxygen: add default location for filters and output folder. contrib/ChangeLog: 2017-05-03 Martin Liska * gcc.doxy: Add default location for filters and output folder. --- contrib/gcc.doxy | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/contrib/gcc.doxy b/contrib/gcc.doxy index 7a284e754aa..a8eeb03c9a0 100644 --- a/contrib/gcc.doxy +++ b/contrib/gcc.doxy @@ -11,16 +11,12 @@ # Values that contain spaces should be placed between quotes (" ") -#- -# NOTE: YOU MUST EDIT THE FOLLOWING HARDWIRED PATHS BEFORE USING THIS FILE. -#- - # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. -OUTPUT_DIRECTORY = @OUTPUT_DIRECTORY@ +OUTPUT_DIRECTORY = gcc-doxygen # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program @@ -30,7 +26,7 @@ OUTPUT_DIRECTORY = @OUTPUT_DIRECTORY@ # to standard output. If FILTER_PATTERNS is specified, this tag will be # ignored. -INPUT_FILTER = @INPUT_FILTER@ +INPUT_FILTER = contrib/filter_gcc_for_doxygen #- -- 2.12.2
Re: [PATCH GCC8][07/33]Offset validity check in address expression
On Tue, May 2, 2017 at 7:06 PM, Bin.Cheng wrote: > On Mon, Apr 24, 2017 at 11:34 AM, Richard Biener > wrote: >> On Tue, Apr 18, 2017 at 12:41 PM, Bin Cheng wrote: >>> Hi, >>> For now, we check validity of offset by computing the maximum offset then >>> checking if >>> offset is smaller than the max offset. This is inaccurate, for example, >>> some targets >>> may require offset to be aligned by power of 2. This patch introduces new >>> interface >>> checking validity of offset. It also buffers rtx among different calls. >>> >>> Is it OK? >> >> - static vec max_offset_list; >> - >> + auto_vec addr_list; >>as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base)); >>mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p)); >> >> - num = max_offset_list.length (); >> + num = addr_list.length (); >>list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode; >>if (list_index >= num) >> >> num here is always zero and thus the compare is always true. >> >> + addr_list.safe_grow_cleared (list_index + MAX_MACHINE_MODE); >> + for (; num < addr_list.length (); num++) >> + addr_list[num] = NULL; >> >> the loop is now redundant (safe_grow_cleared) >> >> + addr = addr_list[list_index]; >> + if (!addr) >> { >> >> always true again... >> >> I wonder if you really indented to drop 'static' from addr_list? >> There's no caching >> across function calls. > Right, the redundancy is because I tried to cache across function > calls with declarations like: > static unsigned num = 0; > static GTY ((skip)) rtx *addr_list = NULL; > But this doesn't work, the addr_list[list_index] still gets corrupted somehow. Well, you need GTY (()), not GTY((skip)) on them. Not sure if it works for function-scope decls, you have to check. Look at whether a GC root is created for the variable in gt-tree-ssa-loop-ivopts.h (need to tweak GTFILES in the makefile plus include that generated file). tree-ssa-address.c uses a global root for mem_addr_template_list for example. Richard. > Thanks, > bin >> >> + /* Split group if aksed to, or the offset against the first >> +use can't fit in offset part of addressing mode. IV uses >> +having the same offset are still kept in one group. */ >> + if (offset != 0 && >> + (split_p || !addr_offset_valid_p (use, offset))) >> >> && goes to the next line. >> >> Richard. >> >> >> >>> Thanks, >>> bin >>> 2017-04-11 Bin Cheng >>> >>> * tree-ssa-loop-ivopts.c (compute_max_addr_offset): Delete. >>> (addr_offset_valid_p): New function. >>> (split_address_groups): Check offset validity with above function.
Re: [PATCH] ggc-page loop
On Wed, May 3, 2017 at 2:09 AM, Andrew Pinski wrote: > On Tue, May 2, 2017 at 3:41 PM, Nathan Sidwell wrote: >> This loop in ggc-page confused me, because the iterator is one greater than >> the indexing value. Also the formatting of the array indexing is incorrect. >> >> Fixed thusly, and applied as obvious after booting on x86_64-linux-gnu > > - for (i = G.by_depth_in_use; i > 0; --i) > + for (unsigned i = G.by_depth_in_use; i--;) > { > - page_entry *p = G.by_depth[i-1]; > - p->index_by_depth = i-1; > + page_entry *p = G.by_depth[i]; > + p->index_by_depth = i; > } > > I think this is still incorrect. you replaced i - 1 by i but did not > start at G.by_depth_in_use - 1; He does, albeit in a coding way I dislike (watch how the iterator update is done in the condition!) Richard. > > Thanks, > Andrew > > >> >> nathan >> -- >> Nathan Sidwell
Re: [PATCH GCC8][01/33]Handle TRUNCATE between tieable modes in rtx_cost
On Wed, May 3, 2017 at 9:38 AM, Bin.Cheng wrote: > On Wed, May 3, 2017 at 7:17 AM, Eric Botcazou wrote: >>> 2017-04-11 Bin Cheng >>> >>> * rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes. >> >> This breaks bootstrap with RTL checking: >> >> /home/eric/build/gcc/native/./gcc/xgcc -B/home/eric/build/gcc/native/./gcc/ - >> nostdinc -x c /dev/null -S -o /dev/null -fself- >> test=/home/eric/svn/gcc/gcc/testsuite/selftests >> cc1: internal compiler error: RTL check: expected code 'subreg', have >> 'truncate' in rtx_cost, at rtlanal.c:4169 >> 0xbae338 rtl_check_failed_code1(rtx_def const*, rtx_code, char const*, int, >> char const*) >> /home/eric/svn/gcc/gcc/rtl.c:829 >> 0xbbc9b4 rtx_cost(rtx_def*, machine_mode, rtx_code, int, bool) >> /home/eric/svn/gcc/gcc/rtlanal.c:4169 >> 0x8517e6 set_src_cost >> /home/eric/svn/gcc/gcc/rtl.h:2685 >> 0x8517e6 init_expmed_one_conv >> /home/eric/svn/gcc/gcc/expmed.c:142 >> 0x8517e6 init_expmed_one_mode >> /home/eric/svn/gcc/gcc/expmed.c:209 >> 0x853fb2 init_expmed() >> /home/eric/svn/gcc/gcc/expmed.c:270 >> 0xc45974 backend_init_target >> /home/eric/svn/gcc/gcc/toplev.c:1665 >> 0xc45974 initialize_rtl() >> > Sorry for disturbing, I will revert this if can't fix today. It looks bogus and I couldn't find the motivating case for it, so revert with attached patch. Build on x86 and commit as obvious. Thanks, bin 2017-05-03 Bin Cheng Revert 2017-05-02 Bin Cheng * rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes. diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c index f18245f..321363f 100644 --- a/gcc/rtlanal.c +++ b/gcc/rtlanal.c @@ -4164,14 +4164,6 @@ rtx_cost (rtx x, machine_mode mode, enum rtx_code outer_code, return COSTS_N_INSNS (2 + factor); break; -case TRUNCATE: - /* If we can tie these modes, make this cheap. */ - if (MODES_TIEABLE_P (mode, GET_MODE (SUBREG_REG (x - { - total = 0; - break; - } - /* FALLTHRU */ default: if (targetm.rtx_costs (x, mode, outer_code, opno, &total, speed)) return total;
Re: Alternative check for vector refs with same alignment
On Wed, May 3, 2017 at 9:54 AM, Richard Sandiford wrote: > vect_find_same_alignment_drs uses the ddr dependence distance > to tell whether two references have the same alignment. Although > that's safe with the current code, there's no particular reason > why a dependence distance of 0 should mean that the accesses start > on the same byte. E.g. a reference to a full complex value could > in principle depend on a reference to the imaginary component. > A later patch adds support for this kind of dependence. > > On the other side, checking modulo vf is pessimistic when the step > divided by the element size is a factor of 2. > > This patch instead looks for cases in which the drs have the same > base, offset and step, and for which the difference in their constant > initial values is a multiple of the alignment. I'm a bit wary about trusting operand_equal_p over dependence analysis. So, did you (can you) add an assert that the new code computes same alignment in all cases where the old one did? > Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Otherwise looks ok. Thanks, Richard. > Thanks, > Richard > > > gcc/ > 2017-05-03 Richard Sandiford > > * tree-vect-data-refs.c (vect_find_same_alignment_drs): Remove > loop_vinfo argument and use of dependence distance vectors. > Check instead whether the two references differ only in their > initial value and assume that they have the same alignment if the > difference is a multiple of the vector alignment. > (vect_analyze_data_refs_alignment): Update call accordingly. > > gcc/testsuite/ > * gcc.dg/vect/vect-103.c: Update wording of dump message. > > Index: gcc/tree-vect-data-refs.c > === > --- gcc/tree-vect-data-refs.c 2017-04-18 19:52:35.060164268 +0100 > +++ gcc/tree-vect-data-refs.c 2017-05-03 08:48:30.536704993 +0100 > @@ -2042,20 +2042,12 @@ vect_enhance_data_refs_alignment (loop_v > vectorization factor. */ > > static void > -vect_find_same_alignment_drs (struct data_dependence_relation *ddr, > - loop_vec_info loop_vinfo) > +vect_find_same_alignment_drs (struct data_dependence_relation *ddr) > { > - unsigned int i; > - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > - int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); >struct data_reference *dra = DDR_A (ddr); >struct data_reference *drb = DDR_B (ddr); >stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); >stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); > - int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra; > - int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb; > - lambda_vector dist_v; > - unsigned int loop_depth; > >if (DDR_ARE_DEPENDENT (ddr) == chrec_known) > return; > @@ -2063,48 +2055,37 @@ vect_find_same_alignment_drs (struct dat >if (dra == drb) > return; > > - if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) > -return; > - > - /* Loop-based vectorization and known data dependence. */ > - if (DDR_NUM_DIST_VECTS (ddr) == 0) > -return; > - > - /* Data-dependence analysis reports a distance vector of zero > - for data-references that overlap only in the first iteration > - but have different sign step (see PR45764). > - So as a sanity check require equal DR_STEP. */ > - if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) > + if (!operand_equal_p (DR_BASE_OBJECT (dra), DR_BASE_OBJECT (drb), > + OEP_ADDRESS_OF) > + || !operand_equal_p (DR_OFFSET (dra), DR_OFFSET (drb), 0) > + || !operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) > return; > > - loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr)); > - FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) > + /* Two references with distance zero have the same alignment. */ > + offset_int diff = (wi::to_offset (DR_INIT (dra)) > +- wi::to_offset (DR_INIT (drb))); > + if (diff != 0) > { > - int dist = dist_v[loop_depth]; > + /* Get the wider of the two alignments. */ > + unsigned int align_a = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE > (stmtinfo_a)); > + unsigned int align_b = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE > (stmtinfo_b)); > + unsigned int max_align = MAX (align_a, align_b); > + > + /* Require the gap to be a multiple of the larger vector alignment. */ > + if (!wi::multiple_of_p (diff, max_align, SIGNED)) > + return; > +} > > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_NOTE, vect_location, > -"dependence distance = %d.\n", dist); > - > - /* Same loop iteration. */ > - if (dist == 0 > - || (dist % vectorization_factor == 0 && dra_size == drb_size)) > - { > - /* Two references with distance zero have the same alignment. */ > - STMT
Re: [PATCH GCC8][01/33]Handle TRUNCATE between tieable modes in rtx_cost
Hi Bin, On 03/05/17 11:02, Bin.Cheng wrote: On Wed, May 3, 2017 at 9:38 AM, Bin.Cheng wrote: On Wed, May 3, 2017 at 7:17 AM, Eric Botcazou wrote: 2017-04-11 Bin Cheng * rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes. This breaks bootstrap with RTL checking: /home/eric/build/gcc/native/./gcc/xgcc -B/home/eric/build/gcc/native/./gcc/ - nostdinc -x c /dev/null -S -o /dev/null -fself- test=/home/eric/svn/gcc/gcc/testsuite/selftests cc1: internal compiler error: RTL check: expected code 'subreg', have 'truncate' in rtx_cost, at rtlanal.c:4169 0xbae338 rtl_check_failed_code1(rtx_def const*, rtx_code, char const*, int, char const*) /home/eric/svn/gcc/gcc/rtl.c:829 0xbbc9b4 rtx_cost(rtx_def*, machine_mode, rtx_code, int, bool) /home/eric/svn/gcc/gcc/rtlanal.c:4169 0x8517e6 set_src_cost /home/eric/svn/gcc/gcc/rtl.h:2685 0x8517e6 init_expmed_one_conv /home/eric/svn/gcc/gcc/expmed.c:142 0x8517e6 init_expmed_one_mode /home/eric/svn/gcc/gcc/expmed.c:209 0x853fb2 init_expmed() /home/eric/svn/gcc/gcc/expmed.c:270 0xc45974 backend_init_target /home/eric/svn/gcc/gcc/toplev.c:1665 0xc45974 initialize_rtl() Sorry for disturbing, I will revert this if can't fix today. It looks bogus and I couldn't find the motivating case for it, so revert with attached patch. Build on x86 and commit as obvious. Thanks, bin 2017-05-03 Bin Cheng Revert 2017-05-02 Bin Cheng * rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes. Looking at the code in the patch... +case TRUNCATE: + /* If we can tie these modes, make this cheap. */ + if (MODES_TIEABLE_P (mode, GET_MODE (SUBREG_REG (x 'code' here is GET_CODE (x) and in this case it is TRUNCATE. SUBREG_REG asserts (in RTL checking mode) that its argument is a SUBREG, so passing it a TRUNCATE rtx would cause the checking failure Eric reported. I think you meant to use XEXP (x, 0) instead of SUBREG_REG (x) ? Thanks, Kyrill
Re: [PATCH GCC8][01/33]Handle TRUNCATE between tieable modes in rtx_cost
On Wed, May 3, 2017 at 11:09 AM, Kyrill Tkachov wrote: > Hi Bin, > > > On 03/05/17 11:02, Bin.Cheng wrote: >> >> On Wed, May 3, 2017 at 9:38 AM, Bin.Cheng wrote: >>> >>> On Wed, May 3, 2017 at 7:17 AM, Eric Botcazou >>> wrote: > > 2017-04-11 Bin Cheng > >* rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes. This breaks bootstrap with RTL checking: /home/eric/build/gcc/native/./gcc/xgcc -B/home/eric/build/gcc/native/./gcc/ - nostdinc -x c /dev/null -S -o /dev/null -fself- test=/home/eric/svn/gcc/gcc/testsuite/selftests cc1: internal compiler error: RTL check: expected code 'subreg', have 'truncate' in rtx_cost, at rtlanal.c:4169 0xbae338 rtl_check_failed_code1(rtx_def const*, rtx_code, char const*, int, char const*) /home/eric/svn/gcc/gcc/rtl.c:829 0xbbc9b4 rtx_cost(rtx_def*, machine_mode, rtx_code, int, bool) /home/eric/svn/gcc/gcc/rtlanal.c:4169 0x8517e6 set_src_cost /home/eric/svn/gcc/gcc/rtl.h:2685 0x8517e6 init_expmed_one_conv /home/eric/svn/gcc/gcc/expmed.c:142 0x8517e6 init_expmed_one_mode /home/eric/svn/gcc/gcc/expmed.c:209 0x853fb2 init_expmed() /home/eric/svn/gcc/gcc/expmed.c:270 0xc45974 backend_init_target /home/eric/svn/gcc/gcc/toplev.c:1665 0xc45974 initialize_rtl() >>> Sorry for disturbing, I will revert this if can't fix today. >> >> It looks bogus and I couldn't find the motivating case for it, so >> revert with attached patch. Build on x86 and commit as obvious. >> >> Thanks, >> bin >> 2017-05-03 Bin Cheng >> >> Revert >> 2017-05-02 Bin Cheng >> * rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes. > > > Looking at the code in the patch... > > +case TRUNCATE: > + /* If we can tie these modes, make this cheap. */ > + if (MODES_TIEABLE_P (mode, GET_MODE (SUBREG_REG (x > > 'code' here is GET_CODE (x) and in this case it is TRUNCATE. > SUBREG_REG asserts (in RTL checking mode) that its argument is a SUBREG, so > passing it a TRUNCATE rtx would cause > the checking failure Eric reported. I think you meant to use XEXP (x, 0) > instead of SUBREG_REG (x) ? Yes, I guess so. Reverted since I couldn't find the original test. Thanks, bin > > Thanks, > Kyrill >
Re: [RFC][PATCH] Introduce -fdump*-folding
On Wed, May 3, 2017 at 10:10 AM, Martin Liška wrote: > Hello > > Last release cycle I spent quite some time with reading of IVOPTS pass > dump file. Using -fdump*-details causes to generate a lot of 'Applying > pattern' > lines, which can make reading of a dump file more complicated. > > There are stats for tramp3d with -O2 and -fdump-tree-all-details. Percentage > number > shows how many lines are of the aforementioned pattern: > > tramp3d-v4.cpp.164t.ivopts: 6.34% > tramp3d-v4.cpp.091t.ccp2: 5.04% > tramp3d-v4.cpp.093t.cunrolli: 4.41% > tramp3d-v4.cpp.129t.laddress: 3.70% > tramp3d-v4.cpp.032t.ccp1: 2.31% > tramp3d-v4.cpp.038t.evrp: 1.90% > tramp3d-v4.cpp.033t.forwprop1: 1.74% > tramp3d-v4.cpp.103t.vrp1: 1.52% > tramp3d-v4.cpp.124t.forwprop3: 1.31% > tramp3d-v4.cpp.181t.vrp2: 1.30% >tramp3d-v4.cpp.161t.cunroll: 1.22% > tramp3d-v4.cpp.027t.fixup_cfg3: 1.11% >tramp3d-v4.cpp.153t.ivcanon: 1.07% > tramp3d-v4.cpp.126t.ccp3: 0.96% > tramp3d-v4.cpp.143t.sccp: 0.91% > tramp3d-v4.cpp.185t.forwprop4: 0.82% >tramp3d-v4.cpp.011t.cfg: 0.74% > tramp3d-v4.cpp.096t.forwprop2: 0.50% > tramp3d-v4.cpp.019t.fixup_cfg1: 0.37% > tramp3d-v4.cpp.120t.phicprop1: 0.33% >tramp3d-v4.cpp.133t.pre: 0.32% > tramp3d-v4.cpp.182t.phicprop2: 0.27% > tramp3d-v4.cpp.170t.veclower21: 0.25% >tramp3d-v4.cpp.029t.einline: 0.24% > > I'm suggesting to add new TDF that will be allocated for that. > Patch can bootstrap on ppc64le-redhat-linux and survives regression tests. > > Thoughts? Ok. Soon we'll want to change dump_flags to uint64_t ... (we have 1 bit left if you allow negative dump_flags). It'll tickle down on a lot of interfaces so introducing dump_flags_t at the same time might be a good idea. Thanks, Richard. > Martin
Re: Alternative check for vector refs with same alignment
On Wed, May 3, 2017 at 11:07 AM, Richard Biener wrote: > On Wed, May 3, 2017 at 9:54 AM, Richard Sandiford > wrote: >> vect_find_same_alignment_drs uses the ddr dependence distance >> to tell whether two references have the same alignment. Although >> that's safe with the current code, there's no particular reason >> why a dependence distance of 0 should mean that the accesses start >> on the same byte. E.g. a reference to a full complex value could >> in principle depend on a reference to the imaginary component. >> A later patch adds support for this kind of dependence. >> >> On the other side, checking modulo vf is pessimistic when the step >> divided by the element size is a factor of 2. >> >> This patch instead looks for cases in which the drs have the same >> base, offset and step, and for which the difference in their constant >> initial values is a multiple of the alignment. > > I'm a bit wary about trusting operand_equal_p over dependence analysis. > So, did you (can you) add an assert that the new code computes > same alignment in all cases where the old one did? At the moment operand_equal_p method looks more powerful than dependence analysis, for example, it can handle the same memory reference with pointer/array_ref forms like in PR65206. However, given dependence check is not expensive here, is it possible to build the patch on top of it when it fails? Thanks, bin > >> Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? > > Otherwise looks ok. > > Thanks, > Richard. > >> Thanks, >> Richard >> >> >> gcc/ >> 2017-05-03 Richard Sandiford >> >> * tree-vect-data-refs.c (vect_find_same_alignment_drs): Remove >> loop_vinfo argument and use of dependence distance vectors. >> Check instead whether the two references differ only in their >> initial value and assume that they have the same alignment if the >> difference is a multiple of the vector alignment. >> (vect_analyze_data_refs_alignment): Update call accordingly. >> >> gcc/testsuite/ >> * gcc.dg/vect/vect-103.c: Update wording of dump message. >> >> Index: gcc/tree-vect-data-refs.c >> === >> --- gcc/tree-vect-data-refs.c 2017-04-18 19:52:35.060164268 +0100 >> +++ gcc/tree-vect-data-refs.c 2017-05-03 08:48:30.536704993 +0100 >> @@ -2042,20 +2042,12 @@ vect_enhance_data_refs_alignment (loop_v >> vectorization factor. */ >> >> static void >> -vect_find_same_alignment_drs (struct data_dependence_relation *ddr, >> - loop_vec_info loop_vinfo) >> +vect_find_same_alignment_drs (struct data_dependence_relation *ddr) >> { >> - unsigned int i; >> - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); >> - int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); >>struct data_reference *dra = DDR_A (ddr); >>struct data_reference *drb = DDR_B (ddr); >>stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); >>stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); >> - int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra; >> - int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb; >> - lambda_vector dist_v; >> - unsigned int loop_depth; >> >>if (DDR_ARE_DEPENDENT (ddr) == chrec_known) >> return; >> @@ -2063,48 +2055,37 @@ vect_find_same_alignment_drs (struct dat >>if (dra == drb) >> return; >> >> - if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) >> -return; >> - >> - /* Loop-based vectorization and known data dependence. */ >> - if (DDR_NUM_DIST_VECTS (ddr) == 0) >> -return; >> - >> - /* Data-dependence analysis reports a distance vector of zero >> - for data-references that overlap only in the first iteration >> - but have different sign step (see PR45764). >> - So as a sanity check require equal DR_STEP. */ >> - if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) >> + if (!operand_equal_p (DR_BASE_OBJECT (dra), DR_BASE_OBJECT (drb), >> + OEP_ADDRESS_OF) >> + || !operand_equal_p (DR_OFFSET (dra), DR_OFFSET (drb), 0) >> + || !operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0)) >> return; >> >> - loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr)); >> - FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) >> + /* Two references with distance zero have the same alignment. */ >> + offset_int diff = (wi::to_offset (DR_INIT (dra)) >> +- wi::to_offset (DR_INIT (drb))); >> + if (diff != 0) >> { >> - int dist = dist_v[loop_depth]; >> + /* Get the wider of the two alignments. */ >> + unsigned int align_a = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE >> (stmtinfo_a)); >> + unsigned int align_b = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE >> (stmtinfo_b)); >> + unsigned int max_align = MAX (align_a, align_b); >> + >> + /* Require the gap to be a multiple of the larger vector alignment.
Re: [PATCH] handle sprintf(d, "%s", ...) in gimple-ssa-sprintf.c
On Tue, May 2, 2017 at 4:41 PM, Martin Sebor wrote: FWIW, my fix for bug 79062 is only partial (it gets the pass to run but the warnings are still not issued). I don't quite understand what prevents the warning flag(s) from getting set when -flto is used. This seems to be a bigger problem than just the sprintf pass not doing something just right. >>> >>> >>> I've never dug deeply in the LTO stuff, but I believe we stream the >>> compiler >>> flags, so it could be something there. >> >> >> We do. >> >>> Alternately you might be running into a case where in LTO mode we >>> recreate >>> base types. Look for a type equality tester that goes beyond just >>> testing >>> pointer equality. >>> >>> ie, in LTO I think we'll create a type based on the streamed data, but I >>> also think we'll create various basic types. Thus in LTO mode pointer >>> equality may not be sufficient. >> >> >> We make sure that for most basic types we end up re-using them where >> possible. >> char_type_node is an example where that generally doesn't work because >> it's >> value depends on a command-line flag. > > > That answers the first part of the question of why the sprintf > pass wouldn't run (or do anything) with -flto. With it fixed > (as in fold-const.c or tree-ssa-strlen.c as you suggested in > bug 79602) it runs and the optimization does its job, but no > warnings are issued. The wan_foo_flags for warnings that are > enabled implicitly (e.g., by -Wall or -Wextra on the command > line) are clear. There seem to be dependencies between warnings > in c.opt that ignore LTO (as a language), but even with those > corrected (i.e., with LTO added as a language to -Wformat and > -Wall) the flags are still clear when LTO runs. Does that ring > any bells for you? You can look at the lto_opts section (it's just a string) and see that we seem to fail to pass through -Wall (or any warning option I tried). This is because /* Also drop all options that are handled by the driver as well, which includes things like -o and -v or -fhelp for example. We do not need those. The only exception is -foffload option, if we write it in offload_lto section. Also drop all diagnostic options. */ if ((cl_options[option->opt_index].flags & (CL_DRIVER|CL_WARNING)) && (!lto_stream_offload_p || option->opt_index != OPT_foffload_)) continue; which means you have to explicitely enable diagnostics you want at link time at the moment. If you want to change that you have to do some changes to lto-wrapper.c as for example only pass through warning options that are set on all input files (warning options are not kept per function). Richard. > > Thanks > Martin
Re: [PATCH, testsuite]: Fix g++.dg/lto/pr79671 execute failure
On Tue, May 2, 2017 at 5:38 PM, Uros Bizjak wrote: > Hello! > > We have to match input and output operand (or use "+") of an empty asm > to move the value to a register. > > 2017-05-02 Uros Bizjak > > * g++.dg/lto/pr79671_0.C (foo): Fix asm constraints. > > Tested on alphaev68-linux-gnu (where the unpatched testcase fails) and > x86_64-linux-gnu. > > OK for mailine? Ok. > > Uros.
Re: Alternative check for vector refs with same alignment
"Bin.Cheng" writes: > On Wed, May 3, 2017 at 11:07 AM, Richard Biener > wrote: >> On Wed, May 3, 2017 at 9:54 AM, Richard Sandiford >> wrote: >>> vect_find_same_alignment_drs uses the ddr dependence distance >>> to tell whether two references have the same alignment. Although >>> that's safe with the current code, there's no particular reason >>> why a dependence distance of 0 should mean that the accesses start >>> on the same byte. E.g. a reference to a full complex value could >>> in principle depend on a reference to the imaginary component. >>> A later patch adds support for this kind of dependence. >>> >>> On the other side, checking modulo vf is pessimistic when the step >>> divided by the element size is a factor of 2. >>> >>> This patch instead looks for cases in which the drs have the same >>> base, offset and step, and for which the difference in their constant >>> initial values is a multiple of the alignment. >> >> I'm a bit wary about trusting operand_equal_p over dependence analysis. >> So, did you (can you) add an assert that the new code computes >> same alignment in all cases where the old one did? FWIW, the operand_equal_p for the base addresses is the same as the one used by the dependence analysis: /* If the references do not access the same object, we do not know whether they alias or not. We do not care about TBAA or alignment info so we can use OEP_ADDRESS_OF to avoid false negatives. But the accesses have to use compatible types as otherwise the built indices would not match. */ if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), OEP_ADDRESS_OF) || !types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (a)), TREE_TYPE (DR_BASE_OBJECT (b { DDR_ARE_DEPENDENT (res) = chrec_dont_know; return res; } > At the moment operand_equal_p method looks more powerful than > dependence analysis, for example, it can handle the same memory > reference with pointer/array_ref forms like in PR65206. However, > given dependence check is not expensive here, is it possible to build > the patch on top of it when it fails? The old check isn't valid after my later patches, because there's no guarantee that the accesses start on the same byte. And like you say, the new check is more powerful in some ways (including the modulo vf thing I mentioned). So I'm not sure we can do anything useful with the dependence distance information. Sometimes it would give false positives and sometimes it would give false negatives. Thanks, Richard
Re: [PATCH, GCC/ARM, Stage 1] PR71607: Fix ICE when loading constant
On 20/04/17 10:54, Prakhar Bahuguna wrote: > [ARM] PR71607: Fix ICE when loading constant > > gcc/ChangeLog: > > 2017-04-18 Andre Vieira > Prakhar Bahuguna > > PR target/71607 > * config/arm/arm.md (use_literal_pool): Removes. > (64-bit immediate split): No longer takes cost into consideration > if 'arm_disable_literal_pool' is enabled. > * config/arm/arm.c (arm_tls_referenced_p): Add diagnostic if TLS is > used when arm_disable_literal_pool is enabled. > (arm_max_const_double_inline_cost): Remove use of > arm_disable_literal_pool. > (arm_reorg): Add return if arm_disable_literal_pool is enabled. > * config/arm/vfp.md (no_literal_pool_df_immediate): New. > (no_literal_pool_sf_immediate): New. > > testsuite/ChangeLog: > > 2017-04-18 Andre Vieira > Thomas Preud'homme > Prakhar Bahuguna > > PR target/71607 > * gcc.target/arm/thumb2-slow-flash-data.c: Renamed to ... > * gcc.target/arm/thumb2-slow-flash-data-1.c: ... this. > * gcc.target/arm/thumb2-slow-flash-data-2.c: New. > * gcc.target/arm/thumb2-slow-flash-data-3.c: New. > * gcc.target/arm/thumb2-slow-flash-data-4.c: New. > * gcc.target/arm/thumb2-slow-flash-data-5.c: New. > * gcc.target/arm/tls-disable-literal-pool.c: New. > > Okay for stage1? > This patch lacks a description of what's going on and why the change is necessary (it should stand alone from the PR data). It's clearly a non-trivial change, so why have you adopted this approach? R. > -- > > Prakhar Bahuguna > > > 0001-ARM-PR71607-Fix-ICE-when-loading-constant.patch > > > From 985100bbf8f168ab9a88ca29869453844eb6b58e Mon Sep 17 00:00:00 2001 > From: Prakhar Bahuguna > Date: Tue, 18 Apr 2017 14:16:46 +0100 > Subject: [PATCH] [ARM] PR71607: Fix ICE when loading constant > > --- > gcc/config/arm/arm.c | 20 +--- > gcc/config/arm/arm.md | 9 ++ > gcc/config/arm/vfp.md | 37 > ++ > ...low-flash-data.c => thumb2-slow-flash-data-1.c} | 0 > .../gcc.target/arm/thumb2-slow-flash-data-2.c | 28 > .../gcc.target/arm/thumb2-slow-flash-data-3.c | 25 +++ > .../gcc.target/arm/thumb2-slow-flash-data-4.c | 26 +++ > .../gcc.target/arm/thumb2-slow-flash-data-5.c | 14 > .../gcc.target/arm/tls-disable-literal-pool.c | 15 + > 9 files changed, 163 insertions(+), 11 deletions(-) > rename gcc/testsuite/gcc.target/arm/{thumb2-slow-flash-data.c => > thumb2-slow-flash-data-1.c} (100%) > create mode 100644 gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-2.c > create mode 100644 gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-3.c > create mode 100644 gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-4.c > create mode 100644 gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-5.c > create mode 100644 gcc/testsuite/gcc.target/arm/tls-disable-literal-pool.c > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index a2d80cfd645..01d8c52d8c5 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -8633,7 +8633,16 @@ arm_tls_referenced_p (rtx x) > { >const_rtx x = *iter; >if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0) > - return true; > + { > + /* ARM currently does not provide relocations to encode TLS variables > + into AArch32 instructions, only data, so there is no way to > + currently implement these if a literal pool is disabled. */ > + if (arm_disable_literal_pool) > + sorry ("accessing thread-local storage is not currently supported " > +"with -mpure-code or -mslow-flash-data"); > + > + return true; > + } > >/* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are >TLS offsets, not real symbol references. */ > @@ -16391,10 +16400,6 @@ push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT > address, rtx *loc, > int > arm_max_const_double_inline_cost () > { > - /* Let the value get synthesized to avoid the use of literal pools. */ > - if (arm_disable_literal_pool) > -return 99; > - >return ((optimize_size || arm_ld_sched) ? 3 : 4); > } > > @@ -17341,6 +17346,11 @@ arm_reorg (void) >if (!optimize) > split_all_insns_noflow (); > > + /* Make sure we do not attempt to create a literal pool even though it > should > + no longer be necessary to create any. */ > + if (arm_disable_literal_pool) > +return ; > + >minipool_fix_head = minipool_fix_tail = NULL; > >/* The first insn must always be a note, or the code below won't > diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md > index 21cfe3a4c31..f9365cde504 100644 > --- a/gcc/config/arm/arm.md > +++ b/gcc/config/arm/arm.md > @@ -233,10 +233,6 @@ > (match_test
Re: [PATCH 2/5][GIMPLE FE] PR testsuite/80580: handle invalid types of "->" operands
On Mon, May 1, 2017 at 8:05 PM, Mikhail Maltsev wrote: > This bug happens when the LHS of operator '->' is either missing, i.e.: > > (->a) = 0; > > or it is not a pointer: > > int b; > b_2->c = 0; > > LHS should be validated. I think for the missing LHS it's better to simply not generate code when expr is in error state like with Index: gcc/c/gimple-parser.c === --- gcc/c/gimple-parser.c (revision 247542) +++ gcc/c/gimple-parser.c (working copy) @@ -968,6 +968,8 @@ c_parser_gimple_postfix_expression_after break; } + if (expr.value == error_mark_node) + break; start = expr.get_start (); finish = c_parser_tokens_buf (parser, 0)->location; expr.value = build_array_ref (op_loc, expr.value, idx); @@ -986,6 +988,8 @@ c_parser_gimple_postfix_expression_after c_parser_gimple_expr_list (parser, &exprlist); c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, "expected %<)%>"); + if (expr.value == error_mark_node) + break; expr.value = build_call_array_loc (expr_loc, TREE_TYPE (TREE_TYPE (expr.value)), expr.value, exprlist.length (), exprlist.address ()); @@ -1014,6 +1018,8 @@ c_parser_gimple_postfix_expression_after start = expr.get_start (); finish = c_parser_peek_token (parser)->get_finish (); c_parser_consume_token (parser); + if (expr.value == error_mark_node) + break; expr.value = build_component_ref (op_loc, expr.value, ident, comp_loc); set_c_expr_source_range (&expr, start, finish); @@ -1052,6 +1058,8 @@ c_parser_gimple_postfix_expression_after start = expr.get_start (); finish = c_parser_peek_token (parser)->get_finish (); c_parser_consume_token (parser); + if (expr.value == error_mark_node) + break; expr.value = build_component_ref (op_loc, build_simple_mem_ref_loc (op_loc, expr.value), it'll also get us some error recovery compared to stop parsing earlier. So maybe you can factor out the "missing op" case from this and the DOT case, doing the above? Then handle callers of build_simple_mem_ref_loc to add sanity checking that we've dereferencing a pointer. Thanks, Richard. > -- > Regards, >Mikhail Maltsev > > gcc/testsuite/ChangeLog: > > 2017-05-01 Mikhail Maltsev > > * gcc.dg/gimplefe-error-6.c: New test. > * gcc.dg/gimplefe-error-7.c: New test. > > > gcc/c/ChangeLog: > > 2017-05-01 Mikhail Maltsev > > * gimple-parser.c (c_parser_gimple_postfix_expression_after_primary): > Check LHS of operator '->' > >
Re: [PATCH 3/5][GIMPLE FE] PR testsuite/80580. Handle invalid unary "*" operand type
On Mon, May 1, 2017 at 8:07 PM, Mikhail Maltsev wrote: > This is essentially the same problem as in patch 2, but with unary '*'. We > should verify that its argument is a pointer. This is ok with being more specific like "expected pointer as argument of unary %<*%>" Thanks, Richard. > -- > Regards, >Mikhail Maltsev > > > gcc/c/ChangeLog: > > 2017-05-01 Mikhail Maltsev > > * gimple-parser.c (c_parser_gimple_unary_expression): Check argument > type of unary '*'. > > gcc/testsuite/ChangeLog: > > 2017-05-01 Mikhail Maltsev > > * gcc.dg/gimplefe-error-8.c: New test. > >
Re: [PATCH] canonical type hashing
On Tue, May 2, 2017 at 3:50 PM, Nathan Sidwell wrote: > On the modules branch, I need rematerialize canonical types and the like > from a read-in serialized tree. > > I discovered the canonical-type hash table is fed a bespoke hash value by > each type creator. There was no generic type hasher :( The rationale > appears to allow each type constuctor to just specialize its needs. > Excitingly, a generic type hasher is hiding inside > build_type_attribute_qual_variant. So I broke it out of there and > generalized it a bit more. > > The type hashers had diverged from the attribute-variant hasher. This is > not an error, because the attribute variant version was creating variants > with non-null attributes, so guaranteed different But it was confusing. > > This generic hasher is slightly different to the bespoke hashers in a few > places. One place of note was the vector type hasher, which mixed {elt-type, > num-elts, vector-mode}, but vector-mode is entirely determined by the first > two object, so mixing it in doesn't add any entropy. I dropped the mode. > > I still kept generating the hashvalue separate to the type_hash_canon call > itself. Perhaps a future patch could change that, but I didn't want to much > churn in this patch. > > I've included Jakub's recent TYPE_TYPELESS_STORAGE changes. (And notice that > the attribute-type hasher wasn't dealing with it.) > > booted and tested on x86_64-linux-gnu, ok? Looks ok to me. The new function name 'type_hash_default' is a little odd, maybe name it type_hash_canon_hash instead? Thanks, Richard. > nathan > -- > Nathan Sidwell
Re: [PATCH] ggc-page loop
On 05/03/2017 05:54 AM, Richard Biener wrote: He does, albeit in a coding way I dislike (watch how the iterator update is done in the condition!) Sorry you dislike that. I've used that kind of loop idiom, since for ever though. nathan -- Nathan Sidwell
Re: [PATCH] canonical type hashing
On 05/03/2017 07:32 AM, Richard Biener wrote: On Tue, May 2, 2017 at 3:50 PM, Nathan Sidwell wrote: Looks ok to me. The new function name 'type_hash_default' is a little odd, maybe name it type_hash_canon_hash instead? sure. I think when I originally broke it out, I didn't know it would end up replacing all the uses :) nathan -- Nathan Sidwell
[C++ PATCH] name lookup cleanup
I'm starting to push my name lookup cleanups from the modules branch. This first patch simply reorders existing decls in cp-tree.h to before the name-lookup #include. It'll allow moving some more things into the cp_global_trees array. The thrust of the name-lookup cleanup will remove a whole bunch of O(N^2) algorithms, in addition to making name-lookup.c itself less of a maze of twisty forwarding functions. I'm not pushing any module-specific bits at this point. nathan -- Nathan Sidwell 2017-05-03 Nathan Sidwell * cp-tree.h (enum cp_tree_index, cp_global_trees): Move earlier, along with #defines, to before name-lookup include. Index: cp-tree.h === --- cp-tree.h (revision 247529) +++ cp-tree.h (working copy) @@ -102,6 +102,182 @@ operator == (const cp_expr &lhs, tree rh return lhs.get_value () == rhs; } + +enum cp_tree_index +{ +CPTI_WCHAR_DECL, +CPTI_VTABLE_ENTRY_TYPE, +CPTI_DELTA_TYPE, +CPTI_VTABLE_INDEX_TYPE, +CPTI_CLEANUP_TYPE, +CPTI_VTT_PARM_TYPE, + +CPTI_CLASS_TYPE, +CPTI_UNKNOWN_TYPE, +CPTI_INIT_LIST_TYPE, +CPTI_VTBL_TYPE, +CPTI_VTBL_PTR_TYPE, +CPTI_STD, +CPTI_ABI, +CPTI_CONST_TYPE_INFO_TYPE, +CPTI_TYPE_INFO_PTR_TYPE, +CPTI_ABORT_FNDECL, +CPTI_AGGR_TAG, + +CPTI_CTOR_IDENTIFIER, +CPTI_COMPLETE_CTOR_IDENTIFIER, +CPTI_BASE_CTOR_IDENTIFIER, +CPTI_DTOR_IDENTIFIER, +CPTI_COMPLETE_DTOR_IDENTIFIER, +CPTI_BASE_DTOR_IDENTIFIER, +CPTI_DELETING_DTOR_IDENTIFIER, +CPTI_DELTA_IDENTIFIER, +CPTI_IN_CHARGE_IDENTIFIER, +CPTI_VTT_PARM_IDENTIFIER, +CPTI_NELTS_IDENTIFIER, +CPTI_THIS_IDENTIFIER, +CPTI_PFN_IDENTIFIER, +CPTI_VPTR_IDENTIFIER, +CPTI_STD_IDENTIFIER, +CPTI_AUTO_IDENTIFIER, +CPTI_DECLTYPE_AUTO_IDENTIFIER, + +CPTI_LANG_NAME_C, +CPTI_LANG_NAME_CPLUSPLUS, + +CPTI_EMPTY_EXCEPT_SPEC, +CPTI_NOEXCEPT_TRUE_SPEC, +CPTI_NOEXCEPT_FALSE_SPEC, +CPTI_TERMINATE, +CPTI_CALL_UNEXPECTED, +CPTI_ATEXIT_FN_PTR_TYPE, +CPTI_ATEXIT, +CPTI_DSO_HANDLE, +CPTI_DCAST, + +CPTI_KEYED_CLASSES, + +CPTI_NULLPTR, +CPTI_NULLPTR_TYPE, + +CPTI_ALIGN_TYPE, + +CPTI_ANY_TARG, + +CPTI_MAX +}; + +extern GTY(()) tree cp_global_trees[CPTI_MAX]; + +#define wchar_decl_nodecp_global_trees[CPTI_WCHAR_DECL] +#define vtable_entry_type cp_global_trees[CPTI_VTABLE_ENTRY_TYPE] +/* The type used to represent an offset by which to adjust the `this' + pointer in pointer-to-member types. */ +#define delta_type_nodecp_global_trees[CPTI_DELTA_TYPE] +/* The type used to represent an index into the vtable. */ +#define vtable_index_type cp_global_trees[CPTI_VTABLE_INDEX_TYPE] + +#define class_type_nodecp_global_trees[CPTI_CLASS_TYPE] +#define unknown_type_node cp_global_trees[CPTI_UNKNOWN_TYPE] +#define init_list_type_nodecp_global_trees[CPTI_INIT_LIST_TYPE] +#define vtbl_type_node cp_global_trees[CPTI_VTBL_TYPE] +#define vtbl_ptr_type_node cp_global_trees[CPTI_VTBL_PTR_TYPE] +#define std_node cp_global_trees[CPTI_STD] +#define abi_node cp_global_trees[CPTI_ABI] +#define const_type_info_type_node cp_global_trees[CPTI_CONST_TYPE_INFO_TYPE] +#define type_info_ptr_type cp_global_trees[CPTI_TYPE_INFO_PTR_TYPE] +#define abort_fndecl cp_global_trees[CPTI_ABORT_FNDECL] +#define current_aggr cp_global_trees[CPTI_AGGR_TAG] +#define nullptr_node cp_global_trees[CPTI_NULLPTR] +#define nullptr_type_node cp_global_trees[CPTI_NULLPTR_TYPE] +/* std::align_val_t */ +#define align_type_nodecp_global_trees[CPTI_ALIGN_TYPE] + +/* We cache these tree nodes so as to call get_identifier less + frequently. */ + +/* The name of a constructor that takes an in-charge parameter to + decide whether or not to construct virtual base classes. */ +#define ctor_identifier cp_global_trees[CPTI_CTOR_IDENTIFIER] +/* The name of a constructor that constructs virtual base classes. */ +#define complete_ctor_identifier cp_global_trees[CPTI_COMPLETE_CTOR_IDENTIFIER] +/* The name of a constructor that does not construct virtual base classes. */ +#define base_ctor_identifier cp_global_trees[CPTI_BASE_CTOR_IDENTIFIER] +/* The name of a destructor that takes an in-charge parameter to + decide whether or not to destroy virtual base classes and whether + or not to delete the object. */ +#define dtor_identifier cp_global_trees[CPTI_DTOR_IDENTIFIER] +/* The name of a destructor that destroys virtual base classes. */ +#define complete_dtor_identifier cp_global_trees[CPTI_COMPLETE_DTOR_IDENTIFIER] +/* The name of a destructor that
[committed] New fix-it printer
The existing fix-it printer can lead to difficult-to-read output when fix-it hints are near each other. For example, in a recent patch to add fix-it hints to the C++ frontend's -Wold-style-cast, e.g. for: foo *f = (foo *)ptr->field; ^ the fix-it hints: replace the open paren with "const_cast<" replace the close paren with "> (" insert ")" after the "ptr->field" would be printed in this odd-looking way: foo *f = (foo *)ptr->field; ^ - const_cast< - > () class rich_location consolidates adjacent fix-it hints, which helps somewhat, but the underlying problem is that the existing printer simply walks through the list of hints printing them, starting newlines as necessary. This patch reimplements fix-it printing by introducing a planning stage: a new class line_corrections "plans" how to print the fix-it hints affecting a line, generating a vec of "correction" instances. Hints that are sufficiently close to each other are consolidated at this stage. This leads to the much more reasonable output for the above case: foo *f = (foo *)ptr->field; ^ - const_cast (ptr->field); where the 3 hints are consolidated into one "correction" at printing. Successfully bootstrapped®rtested on x86_64-pc-linux-gnu. Committed to trunk as r247548. gcc/ChangeLog: * diagnostic-show-locus.c (struct column_range): New struct. (get_affected_columns): New function. (get_printed_columns): New function. (struct correction): New struct. (correction::ensure_capacity): New function. (correction::ensure_terminated): New function. (struct line_corrections): New struct. (line_corrections::~line_corrections): New dtor. (line_corrections::add_hint): New function. (layout::print_trailing_fixits): Reimplement in terms of the new classes. (selftest::test_overlapped_fixit_printing): New function. (selftest::diagnostic_show_locus_c_tests): Call it. --- gcc/diagnostic-show-locus.c | 572 +--- 1 file changed, 534 insertions(+), 38 deletions(-) diff --git a/gcc/diagnostic-show-locus.c b/gcc/diagnostic-show-locus.c index b91c9d5..f410a32 100644 --- a/gcc/diagnostic-show-locus.c +++ b/gcc/diagnostic-show-locus.c @@ -1242,6 +1242,295 @@ layout::annotation_line_showed_range_p (int line, int start_column, return false; } +/* Classes for printing trailing fix-it hints i.e. those that + don't add new lines. + + For insertion, these can look like: + + new_text + + For replacement, these can look like: + + - : underline showing affected range + new_text + + For deletion, these can look like: + + - : underline showing affected range + + This can become confusing if they overlap, and so we need + to do some preprocessing to decide what to print. + We use the list of fixit_hint instances affecting the line + to build a list of "correction" instances, and print the + latter. + + For example, consider a set of fix-its for converting + a C-style cast to a C++ const_cast. + + Given: + + ..0112233. + ..123456789012345678901234567890123456789. + foo *f = (foo *)ptr->field; + ^ + + and the fix-it hints: + - replace col 10 (the open paren) with "const_cast<" + - replace col 16 (the close paren) with "> (" + - insert ")" before col 27 + + then we would get odd-looking output: + + foo *f = (foo *)ptr->field; + ^ + - + const_cast< +- +> () + + It would be better to detect when fixit hints are going to + overlap (those that require new lines), and to consolidate + the printing of such fixits, giving something like: + + foo *f = (foo *)ptr->field; + ^ + - + const_cast (ptr->field) + + This works by detecting when the printing would overlap, and + effectively injecting no-op replace hints into the gaps between + such fix-its, so that the printing joins up. + + In the above example, the overlap of: + - replace col 10 (the open paren) with "const_cast<" + and: + - replace col 16 (the close paren) with "> (" + is fixed by injecting a no-op: + - replace cols 11-15 with themselves ("foo *") + and consolidating these, making: + - replace cols 10-16 with "const_cast<" + "foo *" + "> (" + i.e.: + - replace cols 10-16 with "const_cast (" + + This overlaps with the final fix-it hint: + - insert ")" before col 27 + and so we repeat the consolidation process, by injecting + a no-op: + - replace cols 17-26 with themselves ("ptr->field") + giving: + - replac
Re: [PATCH] Implement a warning for bogus sizeof(pointer) / sizeof(pointer[0])
On Tue, May 2, 2017 at 9:26 AM, Bernd Edlinger wrote: > On 05/01/17 17:54, Jason Merrill wrote: >> On Fri, Apr 28, 2017 at 1:05 PM, Bernd Edlinger >> wrote: >>> On 04/28/17 17:29, Martin Sebor wrote: On 04/28/2017 08:12 AM, Bernd Edlinger wrote: > > Do you want me to change the %qT format strings to %T ? Yes, with the surrounding %< and %> the nested directives should use the unquoted forms, otherwise the printer would end up quoting both the whole expression and the type operand. FWIW, to help avoid this mistake, I think this might be something for GCC -Wformat to warn on and the pretty-printer to detect (and ICE on). >>> >>> Ah, now I understand. That's pretty advanced. >>> >>> Here is the modified patch with correct quoting of the expression. >>> >>> Bootstrap and reg-testing on x86_64-pc-linux-gnu. >> >>> * cp-gimplify.c (cp_fold): Implement the -Wsizeof_pointer_div warning. >> >> I think this warning belongs in cp_build_binary_op rather than cp_fold. >> > > Done, as suggested. The pattern in that function is to treat all *_DIV_EXPR the same; I don't think we need to break that pattern with this patch. So please move the new code after the other DIV case labels. With that the C++ changes are OK. Jason
Re: [PATCH] Fix documentation and a ctor in gcov.c
On 05/02/2017 11:37 AM, Martin Liška wrote: On 04/28/2017 07:23 PM, Nathan Sidwell wrote: Write proper member initializers please. Hi. Done that, patch can bootstrap on ppc64le-redhat-linux and survives regression tests. I consider the patch as pre-approved. yes, thanks! -- Nathan Sidwell
[PATCH v2] C++: Add fix-it hints for -Wold-style-cast
On Thu, 2017-04-27 at 23:03 +0200, Marek Polacek wrote: > On Thu, Apr 27, 2017 at 05:10:24PM -0400, David Malcolm wrote: > > + /* First try const_cast. */ > > + trial = build_const_cast (dst_type, orig_expr, 0 /* complain > > */); > > + if (trial != error_mark_node) > > +return "const_cast"; > > + > > + /* If that fails, try static_cast. */ > > + trial = build_static_cast (dst_type, orig_expr, 0 /* complain > > */); > > + if (trial != error_mark_node) > > +return "static_cast"; > > + > > + /* Finally, try reinterpret_cast. */ > > + trial = build_reinterpret_cast (dst_type, orig_expr, 0 /* > > complain */); > > + if (trial != error_mark_node) > > +return "reinterpret_cast"; > > I think you'll want tf_none instead of 0 /* complain */ in these. > > Marek Thanks. Here's an updated version of the patch. Changes since v1: - updated expected fixit-formatting (the new fix-it printer in r247548 handles this properly now) - added new test cases as suggested by Florian - use "tf_none" rather than "0 /* complain */" Successfully bootstrapped®rtested on x86_64-pc-linux-gnu. OK for trunk? gcc/cp/ChangeLog: * parser.c (get_cast_suggestion): New function. (maybe_add_cast_fixit): New function. (cp_parser_cast_expression): Capture the location of the closing parenthesis. Call maybe_add_cast_fixit when emitting warnings about old-style casts. gcc/testsuite/ChangeLog: * g++.dg/other/old-style-cast-fixits.C: New test case. --- gcc/cp/parser.c| 93 - gcc/testsuite/g++.dg/other/old-style-cast-fixits.C | 95 ++ 2 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/other/old-style-cast-fixits.C diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 4714bc6..2f83aa9 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -8633,6 +8633,85 @@ cp_parser_tokens_start_cast_expression (cp_parser *parser) } } +/* Try to find a legal C++-style cast to DST_TYPE for ORIG_EXPR, trying them + in the order: const_cast, static_cast, reinterpret_cast. + + Don't suggest dynamic_cast. + + Return the first legal cast kind found, or NULL otherwise. */ + +static const char * +get_cast_suggestion (tree dst_type, tree orig_expr) +{ + tree trial; + + /* Reuse the parser logic by attempting to build the various kinds of + cast, with "complain" disabled. + Identify the first such cast that is valid. */ + + /* Don't attempt to run such logic within template processing. */ + if (processing_template_decl) +return NULL; + + /* First try const_cast. */ + trial = build_const_cast (dst_type, orig_expr, tf_none); + if (trial != error_mark_node) +return "const_cast"; + + /* If that fails, try static_cast. */ + trial = build_static_cast (dst_type, orig_expr, tf_none); + if (trial != error_mark_node) +return "static_cast"; + + /* Finally, try reinterpret_cast. */ + trial = build_reinterpret_cast (dst_type, orig_expr, tf_none); + if (trial != error_mark_node) +return "reinterpret_cast"; + + /* No such cast possible. */ + return NULL; +} + +/* If -Wold-style-cast is enabled, add fix-its to RICHLOC, + suggesting how to convert a C-style cast of the form: + + (DST_TYPE)ORIG_EXPR + + to a C++-style cast. + + The primary range of RICHLOC is asssumed to be that of the original + expression. OPEN_PAREN_LOC and CLOSE_PAREN_LOC give the locations + of the parens in the C-style cast. */ + +static void +maybe_add_cast_fixit (rich_location *rich_loc, location_t open_paren_loc, + location_t close_paren_loc, tree orig_expr, + tree dst_type) +{ + /* This function is non-trivial, so bail out now if the warning isn't + going to be emitted. */ + if (!warn_old_style_cast) +return; + + /* Try to find a legal C++ cast, trying them in order: + const_cast, static_cast, reinterpret_cast. */ + const char *cast_suggestion = get_cast_suggestion (dst_type, orig_expr); + if (!cast_suggestion) +return; + + /* Replace the open paren with "CAST_SUGGESTION<". */ + pretty_printer pp; + pp_printf (&pp, "%s<", cast_suggestion); + rich_loc->add_fixit_replace (open_paren_loc, pp_formatted_text (&pp)); + + /* Replace the close paren with "> (". */ + rich_loc->add_fixit_replace (close_paren_loc, "> ("); + + /* Add a closing paren after the expr (the primary range of RICH_LOC). */ + rich_loc->add_fixit_insert_after (")"); +} + + /* Parse a cast-expression. cast-expression: @@ -8668,6 +8747,7 @@ cp_parser_cast_expression (cp_parser *parser, bool address_p, bool cast_p, /* Consume the `('. */ cp_token *open_paren = cp_lexer_consume_token (parser->lexer); location_t open_paren_loc = open_paren->location; + location_t close_paren_loc = UNKNOWN_LOCATION; /* A very tricky bit is that `(struct S) { 3 }' is a compound
Re: [PATCH, v3] Fix PR51513, switch statement with default case containing __builtin_unreachable leads to wild branch
On Wed, 26 Apr 2017, Peter Bergner wrote: > On 4/20/17 8:26 AM, Peter Bergner wrote: > > On 4/20/17 2:37 AM, Richard Biener wrote: > >> Ok, so I think we should ensure that we remove the regular cases > >> with unreachable destination, like in > >> > >> switch (i) > >> { > >> case 0: > >> __builtin_unreachable (); > >> default:; > >> } > >> > >> and handle default with __builtin_unreachable () from switch > >> expansion by omitting the range check for the jump table > >> indexing (and choose a non-default case label for gaps). > > > > Ok, I'll modify optimize_unreachable() to remove the unreachable > > case statements, and leave the unreachable default case statement > > for switch expansion so we can eliminate the range check. Thanks. > > Ok, I lied. :-) It ended up being a fair amount of code to remove > the unreachable case statements within optimize_unreachable(). > Instead, I hooked into group_case_labels_stmt() which is much > simpler! That code eliminates all case statements that have the > same destination as the default case statement. With the patch, > we also eliminate case statements that are unreachable, thus > treating them like default cases. This has the added benefit of > being able to reduce the size of the dispatch table, if those cases > were at the end of the dispatch table entries. > > One difference from the last patch is that I am no longer setting > default_label to NULL when we emit a decision tree. I noticed that > the decision tree code seemed to generate slightly better code for > some of my unit tests if I left it alone. This simplified the > patch somewhat by removing the changes to emit_case_nodes(). > > This passed bootstrap and regtesting on powerpc64le-linux and > x86_64-linux. Is this ok for trunk now? If so, I can hold off > committing it until GCC 7 has been released if that helps. Can you do the gimple_unreachable_bb_p check earlier in expand_case so it covers the emit_case_decision_tree path as well (and verify that works, of course)? So basically right at /* Find the default case target label. */ default_label = jump_target_rtx (CASE_LABEL (gimple_switch_default_label (stmt))); edge default_edge = EDGE_SUCC (bb, 0); int default_prob = default_edge->probability; handle this case. As for Bernhards concern I share this -- please intead make the interface take either a gimple_seq or a gimple_stmt_iterator instead of a basic-block. That makes it more obvious you can't use things like gsi_after_labels. Also I think it's more natural to work backwards as the last stmt in the sequence _has_ to be __builtin_unreachable () and thus checking that first is the cheapest thing to do given that in most cases it will not be __builtin_unreachable () (but a noreturn call or an inifinite loop). Thus, name it gimple_seq_unreachable_p. Thanks, Richard. > Peter > > > gcc/ > * tree-cfg.c (gimple_unreachable_bb_p): New function. > (assert_unreachable_fallthru_edge_p): Use it. > (group_case_labels_stmt): Likewise. > * tree-cfg.h: Prototype it. > * stmt.c: Include cfghooks.h and tree-cfg.h. > (emit_case_dispatch_table) : New local variable. > Use it to fill dispatch table gaps. > Test for default_label before updating probabilities. > (expand_case) : Remove unneeded initialization. > Test for unreachable default case statement and remove its edge. > Set default_label accordingly. > * gcc/tree-ssa-ccp.c (optimize_unreachable): Update comment. > > gcc/testsuite/ > * gcc.target/powerpc/pr51513.c: New test. > * gcc.dg/predict-13.c: Replace __builtin_unreachable() with > __builtin_abort(). > * gcc.dg/predict-14.c: Likewise. > > Index: gcc/tree-cfg.c > === > --- gcc/tree-cfg.c(revision 247291) > +++ gcc/tree-cfg.c(working copy) > @@ -452,6 +452,37 @@ computed_goto_p (gimple *t) > && TREE_CODE (gimple_goto_dest (t)) != LABEL_DECL); > } > > +/* Returns true if the basic block BB has no successors and only contains > + a call to __builtin_unreachable (). */ > + > +bool > +gimple_unreachable_bb_p (basic_block bb) > +{ > + gimple_stmt_iterator gsi; > + gimple_seq stmts = bb_seq (bb); > + gimple *stmt; > + > + if (EDGE_COUNT (bb->succs) != 0) > +return false; > + > + gsi = gsi_start (stmts); > + while (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL) > +gsi_next (&gsi); > + > + if (gsi_end_p (gsi)) > +return false; > + > + stmt = gsi_stmt (gsi); > + while (is_gimple_debug (stmt) || gimple_clobber_p (stmt)) > +{ > + gsi_next (&gsi); > + if (gsi_end_p (gsi)) > + return false; > + stmt = gsi_stmt (gsi); > +} > + return gimple_call_builtin_p (stmt, BUILT_IN_UNREACHABLE); > +} > + > /* Returns true for edge E where e->src ends with a GIMPLE_COND and > the other edge points to a bb with just __builtin_unreachable ().
Re: [PATCH GCC8][17/33]Treat complex cand step as invriant expression
On Tue, Apr 18, 2017 at 12:46 PM, Bin Cheng wrote: > Hi, > We generally need to compute cand step in loop preheader and use it in loop > body. > Unless it's an SSA_NAME of constant integer, an invariant expression is > needed. I'm confused as to what this patch does. Comments talk about "Handle step as" but then we print "Depend on inv...". And we share bitmaps, well it seems + find_inv_vars (data, &step, &cand->inv_vars); + + iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step); + /* Share bitmap between inv_vars and inv_exprs for cand. */ + if (inv_expr != NULL) + { + cand->inv_exprs = cand->inv_vars; + cand->inv_vars = NULL; + if (cand->inv_exprs) + bitmap_clear (cand->inv_exprs); + else + cand->inv_exprs = BITMAP_ALLOC (NULL); + + bitmap_set_bit (cand->inv_exprs, inv_expr->id); just shares the bitmap allocation (and leaks cand->inv_exprs?). Note that generally it might be cheaper to use bitmap_head instead of bitmap in the various structures (and then bitmap_initialize ()), this saves one indirection. Anyway, the relation between inv_vars and inv_exprs is what confuses me. Maybe it's the same as for cost_pair? invariants vs. loop invariants? whatever that means... That is, can you expand the comments in cost_pair / iv_cand for inv_vars vs. inv_exprs, esp what "invariant" actually means? Thanks, Richard. > Thanks, > bin > > 2017-04-11 Bin Cheng > > * tree-ssa-loop-ivopts.c (struct iv_cand): New field inv_exprs. > (dump_cand): Support iv_cand.inv_exprs. > (add_candidate_1): Record invariant exprs in iv_cand.inv_exprs > for candidates. > (iv_ca_set_no_cp, iv_ca_set_cp, free_loop_data): Support > iv_cand.inv_exprs.
Re: Update ipa-cp to new time metrics
Hi, On Tue, May 02, 2017 at 11:33:28AM +0200, Jan Hubicka wrote: > Hi, > this patch makes ipa-cp to use nonspecialized time as a base for decision > about > cloning. I wonder about the capping - we perhaps want to use sreals further > in > the code because time differences can be large (with profile feedback). But I > guess this can be done incrementally - main point of the patch is to update > interfaces from ipa-analysis. > > Bootstrapped/regtested x86_64-linux, OK? Yes it is, thanks. Martin > > Honza > > * ipa-cp.c (perform_estimation_of_a_value): Drop base_time parameter; > update use of estimate_ipcp_clone_size_and_time. > (estimate_local_effects): Update use of > estimate_ipcp_clone_size_and_time and perform_estimation_of_a_value. > * ipa-inline.h (estimate_ipcp_clone_size_and_time): Update prototype. > * ipa-inline-analysis.c (estimate_ipcp_clone_size_and_time): > Return nonspecialized time.
Re: [PATCH] have -Wformat-overflow handle -fexec-charset (PR 80503)
Hi, On 29 April 2017 at 19:56, Andreas Schwab wrote: > On Apr 28 2017, Martin Sebor wrote: > >> +void test_width_and_precision_out_of_range (char *d) >> +{ >> +#if __LONG_MAX__ == 2147483647 >> +# define MAX_P1_STR "2147483648" >> +#elif __LONG_MAX__ == 9223372036854775807 >> +# define MAX_P1_STR "9223372036854775808" >> +#endif >> + >> + T ("%" MAX_P1_STR "i", 0);/* { dg-warning "width out of range" } */ >> + /* { dg-warning "result to exceed .INT_MAX. " "" { target *-*-* } .-1 } */ >> + T ("%." MAX_P1_STR "i", 0); /* { dg-warning "precision out of range" } >> */ > > FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (test for warnings, line 123) > FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (test for warnings, line 125) > FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (test for excess errors) > Excess errors: > /daten/aranym/gcc/gcc-20170429/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c:125:3: > warning: '%.2147483648i' directive output of 2147483648 bytes causes result > to exceed 'INT_MAX' [-Wformat-overflow=] > > Andreas. I've noticed the same errors on arm* targets, if it's easier to reproduce. Thanks, Christophe > > -- > Andreas Schwab, sch...@linux-m68k.org > GPG Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5 > "And now for something completely different."
Re: [PATCH GCC8][01/33]Handle TRUNCATE between tieable modes in rtx_cost
Hi Bin, On 3 May 2017 at 12:12, Bin.Cheng wrote: > On Wed, May 3, 2017 at 11:09 AM, Kyrill Tkachov > wrote: >> Hi Bin, >> >> >> On 03/05/17 11:02, Bin.Cheng wrote: >>> >>> On Wed, May 3, 2017 at 9:38 AM, Bin.Cheng wrote: On Wed, May 3, 2017 at 7:17 AM, Eric Botcazou wrote: >> >> 2017-04-11 Bin Cheng >> >>* rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes. > > This breaks bootstrap with RTL checking: > > /home/eric/build/gcc/native/./gcc/xgcc > -B/home/eric/build/gcc/native/./gcc/ - > nostdinc -x c /dev/null -S -o /dev/null -fself- > test=/home/eric/svn/gcc/gcc/testsuite/selftests > cc1: internal compiler error: RTL check: expected code 'subreg', have > 'truncate' in rtx_cost, at rtlanal.c:4169 > 0xbae338 rtl_check_failed_code1(rtx_def const*, rtx_code, char const*, > int, > char const*) > /home/eric/svn/gcc/gcc/rtl.c:829 > 0xbbc9b4 rtx_cost(rtx_def*, machine_mode, rtx_code, int, bool) > /home/eric/svn/gcc/gcc/rtlanal.c:4169 > 0x8517e6 set_src_cost > /home/eric/svn/gcc/gcc/rtl.h:2685 > 0x8517e6 init_expmed_one_conv > /home/eric/svn/gcc/gcc/expmed.c:142 > 0x8517e6 init_expmed_one_mode > /home/eric/svn/gcc/gcc/expmed.c:209 > 0x853fb2 init_expmed() > /home/eric/svn/gcc/gcc/expmed.c:270 > 0xc45974 backend_init_target > /home/eric/svn/gcc/gcc/toplev.c:1665 > 0xc45974 initialize_rtl() > Sorry for disturbing, I will revert this if can't fix today. >>> >>> It looks bogus and I couldn't find the motivating case for it, so >>> revert with attached patch. Build on x86 and commit as obvious. >>> >>> Thanks, >>> bin >>> 2017-05-03 Bin Cheng >>> >>> Revert >>> 2017-05-02 Bin Cheng >>> * rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes. >> >> >> Looking at the code in the patch... >> >> +case TRUNCATE: >> + /* If we can tie these modes, make this cheap. */ >> + if (MODES_TIEABLE_P (mode, GET_MODE (SUBREG_REG (x >> >> 'code' here is GET_CODE (x) and in this case it is TRUNCATE. >> SUBREG_REG asserts (in RTL checking mode) that its argument is a SUBREG, so >> passing it a TRUNCATE rtx would cause >> the checking failure Eric reported. I think you meant to use XEXP (x, 0) >> instead of SUBREG_REG (x) ? > Yes, I guess so. Reverted since I couldn't find the original test. > If it helps, your patch also introduced regressions on some arm targets. See: http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/247509/report-build-info.html gcc.c-torture/execute/pr53645-2.c -O2 (test for excess errors) gcc.c-torture/execute/pr53645-2.c -O2 -flto -fno-use-linker-plugin -flto-partition=none (test for excess errors) gcc.c-torture/execute/pr53645-2.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects (test for excess errors) gcc.c-torture/execute/pr53645-2.c -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions (test for excess errors) gcc.c-torture/execute/pr53645-2.c -O3 -g (test for excess errors) Thanks, Christophe > Thanks, > bin >> >> Thanks, >> Kyrill >>
Re: [PATCH GCC8][03/33]Refactor invariant variable/expression handling
Hi Bin, On 24 April 2017 at 12:26, Richard Biener wrote: > On Tue, Apr 18, 2017 at 12:38 PM, Bin Cheng wrote: >> Hi, >> This patch refactors how invariant variable/expressions are handled. Now >> they are >> recorded in the same kind data structure and handled similarly, which makes >> code >> easier to understand. >> >> Is it OK? > This patch (r247512) caused regression on some arm targets/cpu/fpu/runtestflags combinations: FAIL:gcc.target/arm/ivopts.c object-size text <= 32 See: http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/247512/report-build-info.html Thanks, Christophe > Ok. > > Richard. > >> Thanks, >> bin >> >> 2017-04-11 Bin Cheng >> >> * tree-ssa-loop-ivopts.c (struct cost_pair): Rename depends_on to >> inv_vars. Add inv_exprs. >> (struct iv_cand): Rename depends_on to inv_vars. >> (struct ivopts_data): Rename max_inv_id/n_invariant_uses to >> max_inv_var_id/n_inv_var_uses. Move max_inv_expr_id around. >> Refactor field used_inv_exprs from has_map to array n_inv_expr_uses. >> (dump_cand): Dump inv_vars. >> (tree_ssa_iv_optimize_init): Support inv_vars and inv_exprs. >> (record_invariant, find_depends, add_candidate_1): Ditto. >> (set_group_iv_cost, force_var_cost): Ditto. >> (split_address_cost, ptr_difference_cost, difference_cost): Ditto. >> (get_computation_cost_at, get_computation_cost): Ditto. >> (determine_group_iv_cost_generic): Ditto. >> (determine_group_iv_cost_address): Ditto. >> (determine_group_iv_cost_cond, autoinc_possible_for_pair): Ditto. >> (determine_group_iv_costs): Ditto. >> (iv_ca_recount_cost): Update call to ivopts_global_cost_for_size. >> (iv_ca_set_remove_invariants): Renamed to ... >> (iv_ca_set_remove_invs): ... this. Support inv_vars and inv_exprs. >> (iv_ca_set_no_cp): Use iv_ca_set_remove_invs. >> (iv_ca_set_add_invariants): Renamed to ... >> (iv_ca_set_add_invs): ... this. Support inv_vars and inv_exprs. >> (iv_ca_set_cp): Use iv_ca_set_add_invs. >> (iv_ca_has_deps): Support inv_vars and inv_exprs. >> (iv_ca_new, iv_ca_free, iv_ca_dump, free_loop_data): Ditto. >> (create_new_ivs): Remove useless dump. >> >> gcc/testsuite/ChangeLog >> 2017-04-11 Bin Cheng >> >> * g++.dg/tree-ssa/ivopts-3.C: Adjust test string.
Re: [PATCH GCC8][03/33]Refactor invariant variable/expression handling
On Wed, May 3, 2017 at 3:38 PM, Christophe Lyon wrote: > Hi Bin, > > > On 24 April 2017 at 12:26, Richard Biener wrote: >> On Tue, Apr 18, 2017 at 12:38 PM, Bin Cheng wrote: >>> Hi, >>> This patch refactors how invariant variable/expressions are handled. Now >>> they are >>> recorded in the same kind data structure and handled similarly, which makes >>> code >>> easier to understand. >>> >>> Is it OK? >> > > This patch (r247512) caused regression on some arm > targets/cpu/fpu/runtestflags combinations: > FAIL:gcc.target/arm/ivopts.c object-size text <= 32 > > See: > http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/247512/report-build-info.html Hi Christophe, Thanks very much for reporting. Seems it's more than pure refactoring. I will investigate it. Thanks, bin > > Thanks, > > Christophe > > >> Ok. >> >> Richard. >> >>> Thanks, >>> bin >>> >>> 2017-04-11 Bin Cheng >>> >>> * tree-ssa-loop-ivopts.c (struct cost_pair): Rename depends_on to >>> inv_vars. Add inv_exprs. >>> (struct iv_cand): Rename depends_on to inv_vars. >>> (struct ivopts_data): Rename max_inv_id/n_invariant_uses to >>> max_inv_var_id/n_inv_var_uses. Move max_inv_expr_id around. >>> Refactor field used_inv_exprs from has_map to array n_inv_expr_uses. >>> (dump_cand): Dump inv_vars. >>> (tree_ssa_iv_optimize_init): Support inv_vars and inv_exprs. >>> (record_invariant, find_depends, add_candidate_1): Ditto. >>> (set_group_iv_cost, force_var_cost): Ditto. >>> (split_address_cost, ptr_difference_cost, difference_cost): Ditto. >>> (get_computation_cost_at, get_computation_cost): Ditto. >>> (determine_group_iv_cost_generic): Ditto. >>> (determine_group_iv_cost_address): Ditto. >>> (determine_group_iv_cost_cond, autoinc_possible_for_pair): Ditto. >>> (determine_group_iv_costs): Ditto. >>> (iv_ca_recount_cost): Update call to ivopts_global_cost_for_size. >>> (iv_ca_set_remove_invariants): Renamed to ... >>> (iv_ca_set_remove_invs): ... this. Support inv_vars and inv_exprs. >>> (iv_ca_set_no_cp): Use iv_ca_set_remove_invs. >>> (iv_ca_set_add_invariants): Renamed to ... >>> (iv_ca_set_add_invs): ... this. Support inv_vars and inv_exprs. >>> (iv_ca_set_cp): Use iv_ca_set_add_invs. >>> (iv_ca_has_deps): Support inv_vars and inv_exprs. >>> (iv_ca_new, iv_ca_free, iv_ca_dump, free_loop_data): Ditto. >>> (create_new_ivs): Remove useless dump. >>> >>> gcc/testsuite/ChangeLog >>> 2017-04-11 Bin Cheng >>> >>> * g++.dg/tree-ssa/ivopts-3.C: Adjust test string.
Re: [PATCH] handle sprintf(d, "%s", ...) in gimple-ssa-sprintf.c
On 05/03/2017 04:20 AM, Richard Biener wrote: On Tue, May 2, 2017 at 4:41 PM, Martin Sebor wrote: FWIW, my fix for bug 79062 is only partial (it gets the pass to run but the warnings are still not issued). I don't quite understand what prevents the warning flag(s) from getting set when -flto is used. This seems to be a bigger problem than just the sprintf pass not doing something just right. I've never dug deeply in the LTO stuff, but I believe we stream the compiler flags, so it could be something there. We do. Alternately you might be running into a case where in LTO mode we recreate base types. Look for a type equality tester that goes beyond just testing pointer equality. ie, in LTO I think we'll create a type based on the streamed data, but I also think we'll create various basic types. Thus in LTO mode pointer equality may not be sufficient. We make sure that for most basic types we end up re-using them where possible. char_type_node is an example where that generally doesn't work because it's value depends on a command-line flag. That answers the first part of the question of why the sprintf pass wouldn't run (or do anything) with -flto. With it fixed (as in fold-const.c or tree-ssa-strlen.c as you suggested in bug 79602) it runs and the optimization does its job, but no warnings are issued. The wan_foo_flags for warnings that are enabled implicitly (e.g., by -Wall or -Wextra on the command line) are clear. There seem to be dependencies between warnings in c.opt that ignore LTO (as a language), but even with those corrected (i.e., with LTO added as a language to -Wformat and -Wall) the flags are still clear when LTO runs. Does that ring any bells for you? You can look at the lto_opts section (it's just a string) and see that we seem to fail to pass through -Wall (or any warning option I tried). This is because /* Also drop all options that are handled by the driver as well, which includes things like -o and -v or -fhelp for example. We do not need those. The only exception is -foffload option, if we write it in offload_lto section. Also drop all diagnostic options. */ if ((cl_options[option->opt_index].flags & (CL_DRIVER|CL_WARNING)) && (!lto_stream_offload_p || option->opt_index != OPT_foffload_)) continue; which means you have to explicitely enable diagnostics you want at link time at the moment. If you want to change that you have to do some changes to lto-wrapper.c as for example only pass through warning options that are set on all input files (warning options are not kept per function). Great, thanks for the pointer! I might tackle that at some point. Jeff, given that we now understand why the warnings are suppressed (i.e., the root cause of the rest of bug 79062), are you okay with treating that independently of this PR (bug 77671) and committing this patch (including the two-line fix to enable the sprintf return value optimization with LTO)? Martin
[PATCH v8] add -fpatchable-function-entry=N,M option
On Wed, Mar 01, 2017 at 01:35:52PM +, Richard Earnshaw (lists) wrote: > >> > >> How about --fpatchable-function-entry=? > > > I haven't reviewed it yet. I'm not really planning to spend any more > time on this until stage1 re-opens. So I guess this is about now? Here is version 8, which is functionally identical to v6 (v7 tried to guard the gen_nop call, which you wrote isn't neccessary). The longer names required some reformatting. Torsten gcc/c-family/ChangeLog 2017-05-03 Torsten Duwe * c-attribs.c (c_common_attribute_table): Add entry for "patchable_function_entry". gcc/lto/ChangeLog 2017-05-03 Torsten Duwe * lto-lang.c (lto_attribute_table): Add entry for "patchable_function_entry". gcc/ChangeLog 2017-05-03 Torsten Duwe * common.opt: Introduce -fpatchable-function-entry command line option, and its variables function_entry_patch_area_size and function_entry_patch_area_start. * opts.c (common_handle_option): Add -fpatchable_function_entry_ case, including a two-value parser. * target.def (print_patchable_function_entry): New target hook. * targhooks.h (default_print_patchable_function_entry): New function. * targhooks.c (default_print_patchable_function_entry): Likewise. * toplev.c (process_options): Switch off IPA-RA if patchable function entries are being generated. * varasm.c (assemble_start_function): Look at the patchable-function-entry command line switch and current function attributes and maybe generate NOP instructions by calling the print_patchable_function_entry hook. * doc/extend.texi: Document patchable_function_entry attribute. * doc/invoke.texi: Document -fpatchable_function_entry command line option. * doc/tm.texi.in (TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY): New target hook. * doc/tm.texi: Likewise. gcc/testsuite/ChangeLog 2017-05-03 Torsten Duwe * c-c++-common/attribute-patchable_function_entry-1.c: New test. diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c index f2a88e147ba..31137ce0433 100644 --- a/gcc/c-family/c-attribs.c +++ b/gcc/c-family/c-attribs.c @@ -139,6 +139,8 @@ static tree handle_bnd_variable_size_attribute (tree *, tree, tree, int, bool *) static tree handle_bnd_legacy (tree *, tree, tree, int, bool *); static tree handle_bnd_instrument (tree *, tree, tree, int, bool *); static tree handle_fallthrough_attribute (tree *, tree, tree, int, bool *); +static tree handle_patchable_function_entry_attribute (tree *, tree, tree, + int, bool *); /* Table of machine-independent attributes common to all C-like languages. @@ -345,6 +347,9 @@ const struct attribute_spec c_common_attribute_table[] = handle_bnd_instrument, false }, { "fallthrough", 0, 0, false, false, false, handle_fallthrough_attribute, false }, + { "patchable_function_entry",1, 2, true, false, false, + handle_patchable_function_entry_attribute, + false }, { NULL, 0, 0, false, false, false, NULL, false } }; @@ -3173,3 +3178,10 @@ handle_fallthrough_attribute (tree *, tree name, tree, int, *no_add_attrs = true; return NULL_TREE; } + +static tree +handle_patchable_function_entry_attribute (tree *, tree, tree, int, bool *) +{ + /* Nothing to be done here. */ + return NULL_TREE; +} diff --git a/gcc/common.opt b/gcc/common.opt index b7ece0c73e1..1b698ef4fc5 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -163,6 +163,13 @@ bool flag_stack_usage_info = false Variable int flag_debug_asm +; How many NOP insns to place at each function entry by default +Variable +HOST_WIDE_INT function_entry_patch_area_size + +; And how far the real asm entry point is into this area +Variable +HOST_WIDE_INT function_entry_patch_area_start ; Balance between GNAT encodings and standard DWARF to emit. Variable @@ -2022,6 +2029,10 @@ fprofile-reorder-functions Common Report Var(flag_profile_reorder_functions) Enable function reordering that improves code placement. +fpatchable-function-entry= +Common Joined Optimization +Insert NOP instructions at each function entry. + frandom-seed Common Var(common_deferred_options) Defer diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 1255995eb78..d09ccd90c42 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -3083,6 +3083,25 @@ that affect more than one function. This attribute should be used for debugging purposes only. It is not suitable in production code. +@item patchable_function_entry +@cindex @code{patchable_function_entry} function attribute +@cindex extra NOP instructions at the function entry point +In case the target's text segment can be made writable at run time by +an
Re: [PATCH] have -Wformat-overflow handle -fexec-charset (PR 80503)
On 05/03/2017 08:22 AM, Christophe Lyon wrote: Hi, On 29 April 2017 at 19:56, Andreas Schwab wrote: On Apr 28 2017, Martin Sebor wrote: +void test_width_and_precision_out_of_range (char *d) +{ +#if __LONG_MAX__ == 2147483647 +# define MAX_P1_STR "2147483648" +#elif __LONG_MAX__ == 9223372036854775807 +# define MAX_P1_STR "9223372036854775808" +#endif + + T ("%" MAX_P1_STR "i", 0);/* { dg-warning "width out of range" } */ + /* { dg-warning "result to exceed .INT_MAX. " "" { target *-*-* } .-1 } */ + T ("%." MAX_P1_STR "i", 0); /* { dg-warning "precision out of range" } */ FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (test for warnings, line 123) FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (test for warnings, line 125) FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (test for excess errors) Excess errors: /daten/aranym/gcc/gcc-20170429/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c:125:3: warning: '%.2147483648i' directive output of 2147483648 bytes causes result to exceed 'INT_MAX' [-Wformat-overflow=] Andreas. I've noticed the same errors on arm* targets, if it's easier to reproduce. Thanks. I committed a trivial fix for this on Monday (https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00036.html). I don't see the failures in recent test results for the few ILP32 targets I've checked so I'm hoping they're gone but if they persist on some others please let me know. Martin
Re: [PATCH] have -Wformat-overflow handle -fexec-charset (PR 80503)
On 3 May 2017 at 16:54, Martin Sebor wrote: > On 05/03/2017 08:22 AM, Christophe Lyon wrote: >> >> Hi, >> >> >> On 29 April 2017 at 19:56, Andreas Schwab wrote: >>> >>> On Apr 28 2017, Martin Sebor wrote: >>> +void test_width_and_precision_out_of_range (char *d) +{ +#if __LONG_MAX__ == 2147483647 +# define MAX_P1_STR "2147483648" +#elif __LONG_MAX__ == 9223372036854775807 +# define MAX_P1_STR "9223372036854775808" +#endif + + T ("%" MAX_P1_STR "i", 0);/* { dg-warning "width out of range" } */ + /* { dg-warning "result to exceed .INT_MAX. " "" { target *-*-* } .-1 } */ + T ("%." MAX_P1_STR "i", 0); /* { dg-warning "precision out of range" } */ >>> >>> >>> FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (test for warnings, line >>> 123) >>> FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (test for warnings, line >>> 125) >>> FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (test for excess errors) >>> Excess errors: >>> >>> /daten/aranym/gcc/gcc-20170429/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c:125:3: >>> warning: '%.2147483648i' directive output of 2147483648 bytes causes result >>> to exceed 'INT_MAX' [-Wformat-overflow=] >>> >>> Andreas. >> >> >> I've noticed the same errors on arm* targets, if it's easier to reproduce. > > > Thanks. I committed a trivial fix for this on Monday > (https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00036.html). > I don't see the failures in recent test results for the few > ILP32 targets I've checked so I'm hoping they're gone but if > they persist on some others please let me know. > Indeed, I confirm your commit r247444 fixed the error. I didn't notice your message because it was in a different thread. Thanks, Christophe > Martin
[PATCH] Remove am33_2.0-linux from config-list.mk
am33_2.0-linux was removed from glibc some time ago. Furthermore, for the testing done by config-list.mk builds, we get almost identical coverage from mn10300-elf. In fact, mn10300-elf is actually better for testing purposes -- we can build newlib to do a first level test of the code generator and we can run the testsuite with the mn103 simulator if we were so inclined. We can't do either with the -linux configuration. This patch removes am33_2.0-linux from config-list.mk. One could argue the port should be deprecated & removed. I'm not sure if that would affect linux kernel builds which still supports the mn103 platform, so I'm leaving it as-is for now. Installing on the trunk, Jeff diff --git a/contrib/ChangeLog b/contrib/ChangeLog index 655e7fb7b84..c26ced0eeaa 100644 --- a/contrib/ChangeLog +++ b/contrib/ChangeLog @@ -1,3 +1,8 @@ +2007-05-03 Jeff Law + + * config-list.mk (am33_2.0-linux): Remove from list of targets + to build. + 2017-05-02 Jakub Jelinek * gennews (files): Add files for GCC 7. diff --git a/contrib/config-list.mk b/contrib/config-list.mk index 0edc8a4b813..fee08b02a2b 100644 --- a/contrib/config-list.mk +++ b/contrib/config-list.mk @@ -32,7 +32,7 @@ GCC_SRC_DIR=../../gcc # v850e1-elf is rejected by config.sub LIST = aarch64-elf aarch64-linux-gnu aarch64-rtems \ alpha-linux-gnu alpha-freebsd6 alpha-netbsd alpha-openbsd \ - alpha64-dec-vms alpha-dec-vms am33_2.0-linux \ + alpha64-dec-vms alpha-dec-vms \ arc-elf32OPT-with-cpu=arc600 arc-elf32OPT-with-cpu=arc700 \ arc-linux-uclibcOPT-with-cpu=arc700 arceb-linux-uclibcOPT-with-cpu=arc700 \ arm-wrs-vxworks arm-netbsdelf \
Re: [PATCH 2/7] clean up quoting problems - gcc (PR 80280 et al.)
On 05/02/2017 08:37 PM, Martin Sebor wrote: The attached patch adds missing quoting to diagnostic directives in files in the top-level gcc/ directory. Martin gcc-80280-gcc.diff gcc/ChangeLog: * builtins.c (expand_builtin_object_size): Add missing quoting to %D and like directives. * hsa-gen.c (hsa_type_for_scalar_tree_type): Same. (hsa_type_for_tree_type): Same. (verify_function_arguments): Same. * symtab.c (symbol_table::change_decl_assembler_name): Same. * varasm.c (get_section): Same. (mark_weak): Same. OK. In fact, go ahead and consider all changes of this nature pre-approved. I think that covers #2-#7 in this series. Jeff
Re: [PATCH v2] Generate reproducible output independently of the build-path
Joseph Myers: > On Tue, 11 Apr 2017, Ximin Luo wrote: > >> Copyright disclaimer >> >> >> I dedicate these patches to the public domain by waiving all of my rights to >> the work worldwide under copyright law, including all related and neighboring >> rights, to the extent allowed by law. >> >> See https://creativecommons.org/publicdomain/zero/1.0/legalcode for full >> text. >> >> Please let me know if the above is insufficient and I will be happy to sign >> any >> relevant forms. > > I believe the FSF wants its own disclaimer forms signed as evidence code > is in the public domain. The process for getting disclaimer forms is to > complete > https://git.savannah.gnu.org/cgit/gnulib.git/plain/doc/Copyright/request-disclaim.changes > > and then you should be sent a disclaimer form for disclaiming the > particular set of changes you have completed (if you then make further > significant changes afterwards, the disclaimer form would then need > completing for them as well). > I've now done this, and the copyright clerk at the FSF has told me that this is complete on their side as well. Did any of you get a chance to look at the patch yet? X -- GPG: ed25519/56034877E1F87C35 GPG: rsa4096/1318EFAC5FBBDBCE https://github.com/infinity0/pubkeys.git
Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)
On Tue, 2 May 2017, Martin Sebor wrote: > In bug 80280 - Missing closing quote (%>) c/semantics.c and > c/c-typeck.c, a translator points out one of a number of kinds > of cosmetic problems that tend to come up late in development, > during translation of GCC messages. Other, arguably more minor, > kinds of issues are caused by forgetting to use proper quoting > when referencing tree nodes, such as %D or %T. > > To help avoid these kinds of bugs, the attached patch enhances > the -Wformat checker to detect these common quoting issues and > report them as warnings. As I see it, there are four kinds of format specifiers in this context: * Specifiers that have no special interaction with quoting and may be used either quoted or unquoted. For example, %d, %s and %E. (%E is only of that kind when used for expressions - when hopefully it should only be used for simple expressions such as constants. Uses for identifiers should be quoted; cases where there can be complex expressions should be replaced by use of location ranges. Ideally identifiers and expressions should have different formats, so these cases can be distinguished and so identifiers can end up with a static type other than "tree".) * Specifiers that should be quoted. For example, %D. * Specifiers that open quoting (%<). * Specifiers that close quoting (%>). (In addition, the q flag serves to quote the specifier it's applied to.) > The remaining six patches in the series correct the problems > highlighted by the warning and get GCC to bootstrap and pass > regression tests on x86_64. I suspect additional fixes will > be needed to get GCC to bootstrap on other targets. I'll do > powerpc64le next but besides a general review I'm looking for > suggestions how to do the same cleanup on all the other targets > with the least disruption. (E.g., if there's a way to roll out > a warning one target at a time I could introduce it under > a suboption of -Wformat and enable the subobption with -Werror > only on the targets that have already been verified.) Use contrib/config-list.mk, with a native compiler with this patch in the PATH, to test building compilers for many configurations. (No doubt you'll also find existing build issues, which may or may not be filed in Bugzilla.) > + /* Diagnose nested or unmatched quoting directives such as GCC's > + "%<...%<" and "%>...%>". As a special case, a directive can > + be considered to both begin and end quoting (e.g., GCC's %E). > + Such a directive is recognized but not diagosed. */ I don't think it makes conceptual sense to consider %E to both begin and end quoting. I'd expect %E to have exactly the same begin_quote and end_quote (or whatever) data as %d and %s, because it has the same properties (may be used quoted or unquoted). > diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h > index 13ca8ea..8932861 100644 > --- a/gcc/c-family/c-format.h > +++ b/gcc/c-family/c-format.h > @@ -132,6 +132,11 @@ struct format_type_detail > struct format_char_info > { >const char *format_chars; > + /* Strings of FORMAT_CHARS characters that begin and end quoting, > + respectively, and pairs of which should be matched in the same > + format string. NULL when no such pairs exist. */ > + const char *quote_begin_chars; > + const char *quote_end_chars; I don't think this comment is sufficiently detailed to make it obvious what should appear in each field for each of the four kinds of format specifiers I enumerated. The best I can reverse-engineer from the code is: NULL for specifiers that may be used either quoted or unquoted *or* listing those specifiers in both quote_begin_chars and quote_end_chars (but I think the option of listing them in both should be removed as conceptually wrong); if the character is an opening or closing quote, list it in the appropriate one; if it must be used quoted, but isn't a quote itself, both strings must be non-NULL but it must not be listed in either. Whether that's right or wrong, the comment needs to make the rules clear. -- Joseph S. Myers jos...@codesourcery.com
Re: [PATCH 5/7] clean up quoting problems - c-family (PR 80280 et al.)
On Tue, 2 May 2017, Martin Sebor wrote: > + inform (loc, "in the expansion of concept %qE %qS", check, sub); Are you sure about this (two consecutive quoted strings, open quote of %qS following closing quote of %qE) or should it be a single quoted string %<%E %S%>? -- Joseph S. Myers jos...@codesourcery.com
Re: [PATCH GCC8][03/33]Refactor invariant variable/expression handling
On Wed, May 3, 2017 at 3:41 PM, Bin.Cheng wrote: > On Wed, May 3, 2017 at 3:38 PM, Christophe Lyon > wrote: >> Hi Bin, >> >> >> On 24 April 2017 at 12:26, Richard Biener wrote: >>> On Tue, Apr 18, 2017 at 12:38 PM, Bin Cheng wrote: Hi, This patch refactors how invariant variable/expressions are handled. Now they are recorded in the same kind data structure and handled similarly, which makes code easier to understand. Is it OK? >>> >> >> This patch (r247512) caused regression on some arm >> targets/cpu/fpu/runtestflags combinations: >> FAIL:gcc.target/arm/ivopts.c object-size text <= 32 >> >> See: >> http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/247512/report-build-info.html > Hi Christophe, > Thanks very much for reporting. Seems it's more than pure > refactoring. I will investigate it. Hi, Though I tried to separate patch as much as possible, there are still dependencies in between. This regression will be resolved by later patches. Thanks, bin > > Thanks, > bin >> >> Thanks, >> >> Christophe >> >> >>> Ok. >>> >>> Richard. >>> Thanks, bin 2017-04-11 Bin Cheng * tree-ssa-loop-ivopts.c (struct cost_pair): Rename depends_on to inv_vars. Add inv_exprs. (struct iv_cand): Rename depends_on to inv_vars. (struct ivopts_data): Rename max_inv_id/n_invariant_uses to max_inv_var_id/n_inv_var_uses. Move max_inv_expr_id around. Refactor field used_inv_exprs from has_map to array n_inv_expr_uses. (dump_cand): Dump inv_vars. (tree_ssa_iv_optimize_init): Support inv_vars and inv_exprs. (record_invariant, find_depends, add_candidate_1): Ditto. (set_group_iv_cost, force_var_cost): Ditto. (split_address_cost, ptr_difference_cost, difference_cost): Ditto. (get_computation_cost_at, get_computation_cost): Ditto. (determine_group_iv_cost_generic): Ditto. (determine_group_iv_cost_address): Ditto. (determine_group_iv_cost_cond, autoinc_possible_for_pair): Ditto. (determine_group_iv_costs): Ditto. (iv_ca_recount_cost): Update call to ivopts_global_cost_for_size. (iv_ca_set_remove_invariants): Renamed to ... (iv_ca_set_remove_invs): ... this. Support inv_vars and inv_exprs. (iv_ca_set_no_cp): Use iv_ca_set_remove_invs. (iv_ca_set_add_invariants): Renamed to ... (iv_ca_set_add_invs): ... this. Support inv_vars and inv_exprs. (iv_ca_set_cp): Use iv_ca_set_add_invs. (iv_ca_has_deps): Support inv_vars and inv_exprs. (iv_ca_new, iv_ca_free, iv_ca_dump, free_loop_data): Ditto. (create_new_ivs): Remove useless dump. gcc/testsuite/ChangeLog 2017-04-11 Bin Cheng * g++.dg/tree-ssa/ivopts-3.C: Adjust test string.
Re: [PATCH] warn on mem calls modifying objects of non-trivial types (PR 80560)
On 04/30/2017 05:39 PM, Joseph Myers wrote: On Sat, 29 Apr 2017, Martin Sebor wrote: +The safe way to either initialize or "reset" objects of non-trivial Should use TeX quotes in Texinfo files, ``reset''. Heh :) I wrestled with Emacs to get rid of those, It kept replacing my plain quotes with the TeX kind but in the end I won. Sounds like I should have trusted it. Let me restore it in the follow-on patch. Martin
Fix bootstrap issue with gcc 4.1
Hi, my change to sreals makes GCC to be miscompiled with GCC 4.1 and -O0. This is related to fact that using sreal implies a non-trivial constructor and thus ggc_cleared_alloc is no longer standard compliant. I however do not quite understand why GCC 4.1 manages to misoptimize this code but I have checked that this helps to fix the issue and hopefully will re-start our periodic testers. Bootstrapped x86_64-linux and comitted. Honza Index: ipa-inline.h === --- ipa-inline.h(revision 247549) +++ ipa-inline.h(working copy) @@ -175,6 +175,16 @@ struct GTY(()) inline_summary int growth; /* Number of SCC on the beginning of inlining process. */ int scc_no; + + void inline_summary () +: estimated_self_stack_size (0), self_size (0), self_time (0), min_size (0), + inlinable (false), contains_cilk_spawn (false), single_caller (false), + fp_expressions (false), estimated_stack_size (false), + stack_frame_offset (false), time (0), size (0), conds (NULL), + entry (NULL), loop_iterations (NULL), loop_stride (NULL), + array_ined (NULL), growth (0), scc_no (0) +{ +} }; class GTY((user)) inline_summary_t: public function_summary @@ -185,7 +195,7 @@ public: static inline_summary_t *create_ggc (symbol_table *symtab) { -struct inline_summary_t *summary = new (ggc_cleared_alloc ()) +struct inline_summary_t *summary = new (ggc_alloc ()) inline_summary_t(symtab, true); summary->disable_insertion_hook (); return summary;
Re: Fix bootstrap issue with gcc 4.1
On Wed, May 03, 2017 at 06:18:08PM +0200, Jan Hubicka wrote: > Hi, > my change to sreals makes GCC to be miscompiled with GCC 4.1 and -O0. This is > related to fact that using sreal implies a non-trivial constructor and thus > ggc_cleared_alloc is no longer standard compliant. I however do not quite > understand > why GCC 4.1 manages to misoptimize this code but I have checked that this > helps > to fix the issue and hopefully will re-start our periodic testers. Is store-merging pass able to optimize that back into reasonable code (sure, not into ggc_cleared_alloc)? Jakub
RE: [PATCH][x86] Add missing intrinsics for ADD[SD,SS] and SUB[SD,SS]
Thank you! Sebastian -Original Message- From: Uros Bizjak [mailto:ubiz...@gmail.com] Sent: Tuesday, May 2, 2017 3:08 PM To: Peryt, Sebastian Cc: gcc-patches@gcc.gnu.org; kirill.yuk...@gmail.com Subject: Re: [PATCH][x86] Add missing intrinsics for ADD[SD,SS] and SUB[SD,SS] On Tue, May 2, 2017 at 11:39 AM, Peryt, Sebastian wrote: > Hi, > Can you please commit it for me? Done. Uros.
[PATCH 1/4][PR tree-optimization/78496] Don't simplify conditionals too early in VRP
This is the first of 3-5 patches to address pr78496. The goal of these patches is to catch jump threads earlier in the pipeline to avoid undesirable behavior in PRE and more generally be able to exploit the secondary opportunities exposed by jump threading. One of the more serious issues I found while investigating 78496 was VRP failing to find what should have been obvious jump threads. The fundamental issue is VRP will simplify conditionals which are fed by a typecast prior to jump threading. So something like this: x = (typecast) y; if (x == 42) Can often be transformed into: if (y == 42) The problem is any ASSERT_EXPRS after the conditional will reference "x" rather than "y". That in turn makes it impossible for VRP to use those ASSERT_EXPRs to thread later jumps that use x == More concretely consider this gimple code: ;; basic block 5, loop depth 0, count 0, freq 1, maybe hot ;;prev block 4, next block 12, flags: (NEW, REACHABLE, VISITED) ;;pred: 3 [50.0%] (TRUE_VALUE,EXECUTABLE) ;;4 [100.0%] (FALLTHRU,EXECUTABLE) # iftmp.0_2 = PHI <1(3), 0(4)> in_loop_7 = (unsigned char) iftmp.0_2; if (in_loop_7 != 0) goto ; [33.00%] else goto ; [67.00%] ;;succ: 6 [33.0%] (TRUE_VALUE,EXECUTABLE) ;;12 [67.0%] (FALSE_VALUE,EXECUTABLE) ;; basic block 12, loop depth 0, count 0, freq 6700, maybe hot ;;prev block 5, next block 6, flags: (NEW) ;;pred: 5 [67.0%] (FALSE_VALUE,EXECUTABLE) in_loop_15 = ASSERT_EXPR ; goto ; [100.00%] ;;succ: 7 [100.0%] (FALLTHRU) ;; basic block 6, loop depth 0, count 0, freq 3300, maybe hot ;;prev block 12, next block 7, flags: (NEW, REACHABLE, VISITED) ;;pred: 5 [33.0%] (TRUE_VALUE,EXECUTABLE) in_loop_14 = ASSERT_EXPR ; simple_iv (); ;;succ: 7 [100.0%] (FALLTHRU,EXECUTABLE) And later we have: ;; basic block 9, loop depth 0, count 0, freq 8476, maybe hot ;;prev block 8, next block 10, flags: (NEW, REACHABLE, VISITED) ;;pred: 7 [84.8%] (FALSE_VALUE,EXECUTABLE) if (in_loop_7 == 0) goto ; [36.64%] else goto ; [63.36%] VRP knows it can replace the uses of in_loop_7 in the conditionals in blocks 5 and 9 with iftmp.0_2 and happily does so *before* jump threading (but well after ASSERT_EXPR insertion). As a result VRP is unable to utilize the ASSERT_EXPRs in blocks 12 and 6 (which reference in_loop_7) to thread the jump at bb9 (which now references iftmp.0_2). The cases in pr78496 are slightly more complex, but boil down to the same core issue -- simplifying the conditional too early. Thankfully this is easy to fix. We just split the conditional simplification into two steps so that the transformation noted above occurs after jump threading (the other simplifications we want to occur before jump threading). This allows VRP1 to pick up 27 missed jump threads in the testcase from 78496. It could well be enough to address 78496, but since we don't have a solid description of the desired end result I won't consider 78496 fixed quite yet as there's significant further improvements we can make. Bootstrapped and regression tested on x86_64-linux-gnu. Installing on the trunk. Jeff
Re: [PATCH 1/4][PR tree-optimization/78496] Don't simplify conditionals too early in VRP
[ With the patch attached... ] On 05/03/2017 10:31 AM, Jeff Law wrote: This is the first of 3-5 patches to address pr78496. The goal of these patches is to catch jump threads earlier in the pipeline to avoid undesirable behavior in PRE and more generally be able to exploit the secondary opportunities exposed by jump threading. One of the more serious issues I found while investigating 78496 was VRP failing to find what should have been obvious jump threads. The fundamental issue is VRP will simplify conditionals which are fed by a typecast prior to jump threading. So something like this: x = (typecast) y; if (x == 42) Can often be transformed into: if (y == 42) The problem is any ASSERT_EXPRS after the conditional will reference "x" rather than "y". That in turn makes it impossible for VRP to use those ASSERT_EXPRs to thread later jumps that use x == More concretely consider this gimple code: ;; basic block 5, loop depth 0, count 0, freq 1, maybe hot ;;prev block 4, next block 12, flags: (NEW, REACHABLE, VISITED) ;;pred: 3 [50.0%] (TRUE_VALUE,EXECUTABLE) ;;4 [100.0%] (FALLTHRU,EXECUTABLE) # iftmp.0_2 = PHI <1(3), 0(4)> in_loop_7 = (unsigned char) iftmp.0_2; if (in_loop_7 != 0) goto ; [33.00%] else goto ; [67.00%] ;;succ: 6 [33.0%] (TRUE_VALUE,EXECUTABLE) ;;12 [67.0%] (FALSE_VALUE,EXECUTABLE) ;; basic block 12, loop depth 0, count 0, freq 6700, maybe hot ;;prev block 5, next block 6, flags: (NEW) ;;pred: 5 [67.0%] (FALSE_VALUE,EXECUTABLE) in_loop_15 = ASSERT_EXPR ; goto ; [100.00%] ;;succ: 7 [100.0%] (FALLTHRU) ;; basic block 6, loop depth 0, count 0, freq 3300, maybe hot ;;prev block 12, next block 7, flags: (NEW, REACHABLE, VISITED) ;;pred: 5 [33.0%] (TRUE_VALUE,EXECUTABLE) in_loop_14 = ASSERT_EXPR ; simple_iv (); ;;succ: 7 [100.0%] (FALLTHRU,EXECUTABLE) And later we have: ;; basic block 9, loop depth 0, count 0, freq 8476, maybe hot ;;prev block 8, next block 10, flags: (NEW, REACHABLE, VISITED) ;;pred: 7 [84.8%] (FALSE_VALUE,EXECUTABLE) if (in_loop_7 == 0) goto ; [36.64%] else goto ; [63.36%] VRP knows it can replace the uses of in_loop_7 in the conditionals in blocks 5 and 9 with iftmp.0_2 and happily does so *before* jump threading (but well after ASSERT_EXPR insertion). As a result VRP is unable to utilize the ASSERT_EXPRs in blocks 12 and 6 (which reference in_loop_7) to thread the jump at bb9 (which now references iftmp.0_2). The cases in pr78496 are slightly more complex, but boil down to the same core issue -- simplifying the conditional too early. Thankfully this is easy to fix. We just split the conditional simplification into two steps so that the transformation noted above occurs after jump threading (the other simplifications we want to occur before jump threading). This allows VRP1 to pick up 27 missed jump threads in the testcase from 78496. It could well be enough to address 78496, but since we don't have a solid description of the desired end result I won't consider 78496 fixed quite yet as there's significant further improvements we can make. Bootstrapped and regression tested on x86_64-linux-gnu. Installing on the trunk. Jeff diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 92a4e395ba8..32cc81978df 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2017-05-03 Jeff Law + + PR tree-optimization/78496 + * tree-vrp.c (simplify_cond_using_ranges_1): Renamed + from simplify_cond_using_ranges. Split off code to walk + backwards through casts into ... + (simplify_cond_using_ranges_2): New function. + (simplify_stmt_using_ranges): Call simplify_cond_using_ranges_1. + (execute_vrp): After identifying jump threads, call + simplify_cond_using_ranges_2. + 2017-05-03 Jeff Downs Rainer Orth diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 596468d33e9..55a44e4635a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-05-03 Jeff Law + + PR tree-optimization/78496 + * gcc.dg/tree-ssa/ssa-thread-15.c: New test. + 2017-05-03 Uros Bizjak * g++.dg/lto/pr79671_0.C (foo): Fix asm constraints. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c new file mode 100644 index 000..f73268cacbb --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-vrp1" } */ + +/* We should thread the if (!in_loop) completely leaving + just two conditionals. */ +/* { dg-final { scan-tree-dump-times "if \\(" 2 "vrp1" } } */ + + +union tree_node; +typedef union tree_node *tree; + +enum size_type_kind +{ + SIZETYPE, + SSIZETYPE, + BITSIZETYPE, + SBITSIZETYPE, +
Re: Fix bootstrap issue with gcc 4.1
> On Wed, May 03, 2017 at 06:18:08PM +0200, Jan Hubicka wrote: > > Hi, > > my change to sreals makes GCC to be miscompiled with GCC 4.1 and -O0. This > > is > > related to fact that using sreal implies a non-trivial constructor and thus > > ggc_cleared_alloc is no longer standard compliant. I however do not quite > > understand > > why GCC 4.1 manages to misoptimize this code but I have checked that this > > helps > > to fix the issue and hopefully will re-start our periodic testers. > > Is store-merging pass able to optimize that back into reasonable code > (sure, not into ggc_cleared_alloc)? It is still a sequence of assignments to 0 in .optimized pass. Honza > > Jakub
Re: Fix bootstrap issue with gcc 4.1
On May 3, 2017 6:22:14 PM GMT+02:00, Jakub Jelinek wrote: >On Wed, May 03, 2017 at 06:18:08PM +0200, Jan Hubicka wrote: >> Hi, >> my change to sreals makes GCC to be miscompiled with GCC 4.1 and -O0. > This is >> related to fact that using sreal implies a non-trivial constructor >and thus >> ggc_cleared_alloc is no longer standard compliant. I however do not >quite understand >> why GCC 4.1 manages to misoptimize this code but I have checked that >this helps >> to fix the issue and hopefully will re-start our periodic testers. > >Is store-merging pass able to optimize that back into reasonable code >(sure, not into ggc_cleared_alloc)? Whether it is or not, the previous code was buggy. The zeroing does not prevail until after the object construction begins (not sure whether for PODs this would be different). Richard. > Jakub
Re: Fix bootstrap issue with gcc 4.1
On Wed, May 03, 2017 at 06:39:18PM +0200, Jan Hubicka wrote: > > On Wed, May 03, 2017 at 06:18:08PM +0200, Jan Hubicka wrote: > > > Hi, > > > my change to sreals makes GCC to be miscompiled with GCC 4.1 and -O0. > > > This is > > > related to fact that using sreal implies a non-trivial constructor and > > > thus > > > ggc_cleared_alloc is no longer standard compliant. I however do not > > > quite understand > > > why GCC 4.1 manages to misoptimize this code but I have checked that this > > > helps > > > to fix the issue and hopefully will re-start our periodic testers. > > > > Is store-merging pass able to optimize that back into reasonable code > > (sure, not into ggc_cleared_alloc)? > > It is still a sequence of assignments to 0 in .optimized pass. Including the bool fields that are adjacent? Jakub
Re: Fix bootstrap issue with gcc 4.1
On Wed, May 03, 2017 at 06:44:46PM +0200, Richard Biener wrote: > On May 3, 2017 6:22:14 PM GMT+02:00, Jakub Jelinek wrote: > >On Wed, May 03, 2017 at 06:18:08PM +0200, Jan Hubicka wrote: > >> Hi, > >> my change to sreals makes GCC to be miscompiled with GCC 4.1 and -O0. > > This is > >> related to fact that using sreal implies a non-trivial constructor > >and thus > >> ggc_cleared_alloc is no longer standard compliant. I however do not > >quite understand > >> why GCC 4.1 manages to misoptimize this code but I have checked that > >this helps > >> to fix the issue and hopefully will re-start our periodic testers. > > > >Is store-merging pass able to optimize that back into reasonable code > >(sure, not into ggc_cleared_alloc)? > > Whether it is or not, the previous code was buggy. The zeroing does not > prevail until after the object construction begins (not sure whether for > PODs this would be different). Sure, I'm not questioning the patch, just wondering if we shouldn't improve store-merging further (we want to do it anyway for e.g. bitop adjacent operations etc.). Jakub
Re: [RFC PATCH 0/3] Call summary class
On Mon, Feb 27, 2017 at 05:36:48PM +0100, Martin Jambor wrote: > Hello, > > the patch sequence in this thread adds a call_summary class, which is > analogous to function_summary we already have but which gathers > information about call graph edges, rather than nodes. > > The first patch implements the class itself, the second modifies > ipa-prop.[ch] and ipa-cp.c to use it instead of a vector indexed by > edge->uid and the third patch is a semi-related cleanup I spotted > along the way. > > I have LTO-bootstrapped and tested the patches on x86_64-linux and > successfully LTO-built Mozilla Firefox with it. I'll be grateful for > any comments, otherwise I'll queue the series for the next stage1. > After Honza re-approved the patches in person, I have committed them as revisions 247557, 247558 and 247559. Thanks, Martin > > Martin Jambor (3): > call_summary to keep info about cgraph_edges > Use call_summary in ipa-prop and ipa-cp > Remove ipa_update_after_lto_read > > gcc/ipa-cp.c | 4 - > gcc/ipa-inline-analysis.c | 2 +- > gcc/ipa-inline.c | 3 - > gcc/ipa-profile.c | 2 +- > gcc/ipa-prop.c| 82 -- > gcc/ipa-prop.h| 60 + > gcc/symbol-summary.h | 214 > +- > 7 files changed, 273 insertions(+), 94 deletions(-) > > -- > 2.11.1
Re: Fix bootstrap issue with gcc 4.1
On May 3, 2017 6:46:05 PM GMT+02:00, Jakub Jelinek wrote: >On Wed, May 03, 2017 at 06:44:46PM +0200, Richard Biener wrote: >> On May 3, 2017 6:22:14 PM GMT+02:00, Jakub Jelinek >wrote: >> >On Wed, May 03, 2017 at 06:18:08PM +0200, Jan Hubicka wrote: >> >> Hi, >> >> my change to sreals makes GCC to be miscompiled with GCC 4.1 and >-O0. >> > This is >> >> related to fact that using sreal implies a non-trivial constructor >> >and thus >> >> ggc_cleared_alloc is no longer standard compliant. I however do >not >> >quite understand >> >> why GCC 4.1 manages to misoptimize this code but I have checked >that >> >this helps >> >> to fix the issue and hopefully will re-start our periodic testers. >> > >> >Is store-merging pass able to optimize that back into reasonable >code >> >(sure, not into ggc_cleared_alloc)? >> >> Whether it is or not, the previous code was buggy. The zeroing does >not >> prevail until after the object construction begins (not sure whether >for >> PODs this would be different). > >Sure, I'm not questioning the patch, just wondering if we shouldn't >improve >store-merging further (we want to do it anyway for e.g. bitop adjacent >operations etc.). We definitely want to do that. It should also 'nicely' merge with bswap for gathering the load side of a piecewise memory to memory copy. Richard. > > Jakub
Re: [PATCH 1/4][PR tree-optimization/78496] Don't simplify conditionals too early in VRP
On Wed, May 3, 2017 at 5:32 PM, Jeff Law wrote: > [ With the patch attached... ] > > > On 05/03/2017 10:31 AM, Jeff Law wrote: >> >> This is the first of 3-5 patches to address pr78496. >> >> The goal of these patches is to catch jump threads earlier in the pipeline >> to avoid undesirable behavior in PRE and more generally be able to exploit >> the secondary opportunities exposed by jump threading. >> >> One of the more serious issues I found while investigating 78496 was VRP >> failing to find what should have been obvious jump threads. The fundamental >> issue is VRP will simplify conditionals which are fed by a typecast prior to >> jump threading. So something like this: >> >> x = (typecast) y; >> if (x == 42) >> >> Can often be transformed into: >> >> if (y == 42) >> >> >> The problem is any ASSERT_EXPRS after the conditional will reference "x" >> rather than "y". That in turn makes it impossible for VRP to use those >> ASSERT_EXPRs to thread later jumps that use x == >> >> >> More concretely consider this gimple code: >> >> >> ;; basic block 5, loop depth 0, count 0, freq 1, maybe hot >> ;;prev block 4, next block 12, flags: (NEW, REACHABLE, VISITED) >> ;;pred: 3 [50.0%] (TRUE_VALUE,EXECUTABLE) >> ;;4 [100.0%] (FALLTHRU,EXECUTABLE) >># iftmp.0_2 = PHI <1(3), 0(4)> >>in_loop_7 = (unsigned char) iftmp.0_2; >>if (in_loop_7 != 0) >> goto ; [33.00%] >>else >> goto ; [67.00%] >> >> ;;succ: 6 [33.0%] (TRUE_VALUE,EXECUTABLE) >> ;;12 [67.0%] (FALSE_VALUE,EXECUTABLE) >> >> ;; basic block 12, loop depth 0, count 0, freq 6700, maybe hot >> ;;prev block 5, next block 6, flags: (NEW) >> ;;pred: 5 [67.0%] (FALSE_VALUE,EXECUTABLE) >>in_loop_15 = ASSERT_EXPR ; >>goto ; [100.00%] >> ;;succ: 7 [100.0%] (FALLTHRU) >> >> ;; basic block 6, loop depth 0, count 0, freq 3300, maybe hot >> ;;prev block 12, next block 7, flags: (NEW, REACHABLE, VISITED) >> ;;pred: 5 [33.0%] (TRUE_VALUE,EXECUTABLE) >>in_loop_14 = ASSERT_EXPR ; >>simple_iv (); >> ;;succ: 7 [100.0%] (FALLTHRU,EXECUTABLE) >> >> And later we have: >> >> ;; basic block 9, loop depth 0, count 0, freq 8476, maybe hot >> ;;prev block 8, next block 10, flags: (NEW, REACHABLE, VISITED) >> ;;pred: 7 [84.8%] (FALSE_VALUE,EXECUTABLE) >>if (in_loop_7 == 0) >> goto ; [36.64%] >>else >> goto ; [63.36%] >> >> VRP knows it can replace the uses of in_loop_7 in the conditionals in >> blocks 5 and 9 with iftmp.0_2 and happily does so *before* jump threading >> (but well after ASSERT_EXPR insertion). >> >> As a result VRP is unable to utilize the ASSERT_EXPRs in blocks 12 and 6 >> (which reference in_loop_7) to thread the jump at bb9 (which now references >> iftmp.0_2). >> >> >> The cases in pr78496 are slightly more complex, but boil down to the same >> core issue -- simplifying the conditional too early. >> >> Thankfully this is easy to fix. We just split the conditional >> simplification into two steps so that the transformation noted above occurs >> after jump threading (the other simplifications we want to occur before jump >> threading). >> >> This allows VRP1 to pick up 27 missed jump threads in the testcase from >> 78496. It could well be enough to address 78496, but since we don't have a >> solid description of the desired end result I won't consider 78496 fixed >> quite yet as there's significant further improvements we can make. >> >> Bootstrapped and regression tested on x86_64-linux-gnu. Installing on the >> trunk. >> >> Jeff >> > > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index 92a4e395ba8..32cc81978df 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,3 +1,14 @@ > +2017-05-03 Jeff Law > + > + PR tree-optimization/78496 > + * tree-vrp.c (simplify_cond_using_ranges_1): Renamed > + from simplify_cond_using_ranges. Split off code to walk > + backwards through casts into ... > + (simplify_cond_using_ranges_2): New function. > + (simplify_stmt_using_ranges): Call simplify_cond_using_ranges_1. > + (execute_vrp): After identifying jump threads, call > + simplify_cond_using_ranges_2. > + > 2017-05-03 Jeff Downs > Rainer Orth > > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog > index 596468d33e9..55a44e4635a 100644 > --- a/gcc/testsuite/ChangeLog > +++ b/gcc/testsuite/ChangeLog > @@ -1,3 +1,8 @@ > +2017-05-03 Jeff Law > + > + PR tree-optimization/78496 > + * gcc.dg/tree-ssa/ssa-thread-15.c: New test. > + > 2017-05-03 Uros Bizjak > > * g++.dg/lto/pr79671_0.C (foo): Fix asm constraints. > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c > b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c > new file mode 100644 > index 000..f73268cacbb > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c > @@ -0,0 +1,51 @@ > +/* { dg-do
[committed] Fix typo in common.opt
Successfully bootstrapped®rtested on x86_64-pc-linux-gnu. Committed to trunk as r247562. gcc/ChangeLog: * common.opt (fdiagnostics-parseable-fixits): Fix typo. --- gcc/common.opt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/common.opt b/gcc/common.opt index 4021622..5817407 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1225,7 +1225,7 @@ Enum(diagnostic_color_rule) String(auto) Value(DIAGNOSTICS_COLOR_AUTO) fdiagnostics-parseable-fixits Common Var(flag_diagnostics_parseable_fixits) -Print fixit hints in machine-readable form. +Print fix-it hints in machine-readable form. fdiagnostics-generate-patch Common Var(flag_diagnostics_generate_patch) -- 1.8.5.3
Re: C++ PATCH to fix ICE in replace_placeholders_r (PR c++/79937)
On Tue, Apr 25, 2017 at 12:17 PM, Marek Polacek wrote: > On Fri, Apr 07, 2017 at 03:27:36PM -0400, Jason Merrill wrote: >> On Fri, Mar 24, 2017 at 12:22 PM, Marek Polacek wrote: >> > On Thu, Mar 23, 2017 at 05:09:58PM -0400, Jason Merrill wrote: >> >> On Thu, Mar 23, 2017 at 4:34 PM, Marek Polacek wrote: >> >> > On Tue, Mar 14, 2017 at 02:34:30PM -0400, Jason Merrill wrote: >> >> >> On Tue, Mar 14, 2017 at 2:33 PM, Jason Merrill >> >> >> wrote: >> >> >> > On Tue, Mar 7, 2017 at 12:10 PM, Marek Polacek >> >> >> > wrote: >> >> >> >> In this testcase we have >> >> >> >> C c = bar (X{1}); >> >> >> >> which store_init_value sees as >> >> >> >> c = TARGET_EXPR > >> >> >> .n=(&)->i}>)> >> >> >> >> i.e. we're initializing "c" with a TARGET_EXPR. We call >> >> >> >> replace_placeholders >> >> >> >> that walks the whole tree to substitute the placeholders. >> >> >> >> Eventually we find >> >> >> >> the nested but that's for another >> >> >> >> object, so we >> >> >> >> crash. Seems that we shouldn't have stepped into the second >> >> >> >> TARGET_EXPR at >> >> >> >> all; it has nothing to with "c", it's bar's argument. >> >> >> >> >> >> >> >> It occurred to me that we shouldn't step into CALL_EXPRs and leave >> >> >> >> the >> >> >> >> placeholders in function arguments to cp_gimplify_init_expr which >> >> >> >> calls >> >> >> >> replace_placeholders for constructors. Not sure if it's enough to >> >> >> >> handle >> >> >> >> CALL_EXPRs like this, anything else? >> >> >> > >> >> >> > Hmm, we might have a DMI containing a call with an argument referring >> >> >> > to *this, i.e. >> >> >> > >> >> >> > struct A >> >> >> > { >> >> >> > int i; >> >> >> > int j = frob (this->i); >> >> >> > }; >> >> >> > >> >> >> > The TARGET_EXPR seems like a more likely barrier, but even there we >> >> >> > could have something like >> >> >> > >> >> >> > struct A { int i; }; >> >> >> > struct B >> >> >> > { >> >> >> > int i; >> >> >> > A a = A{this->i}; >> >> >> > }; >> >> >> > >> >> >> > I think we need replace_placeholders to keep a stack of objects, so >> >> >> > that when we see a TARGET_EXPR we add it to the stack and therefore >> >> >> > can properly replace a PLACEHOLDER_EXPR of its type. >> >> >> >> >> >> Or actually, avoid replacing such a PLACEHOLDER_EXPR, but rather leave >> >> >> it for later when we lower the TARGET_EXPR. >> >> > >> >> > Sorry, I don't really follow. I have a patch that puts TARGET_EXPRs on >> >> > a stack, but I don't know how that helps. E.g. with nsdmi-aggr3.C >> >> > we have >> >> > B b = TARGET_EXPR > >> > &>}> >> >> > so when we get to that PLACEHOLDER_EXPR, on the stack there's >> >> > TARGET_EXPR with type struct A >> >> > TARGET_EXPR with type struct B >> >> > so the type of the PLACEHOLDER_EXPR doesn't match the type of the >> >> > current >> >> > TARGET_EXPR, but we still want to replace it in this case. >> >> > >> >> > So -- could you expand a bit on what you had in mind, please? >> >> >> >> So then when we see a placeholder, we walk the stack to find the >> >> object of the matching type. >> >> >> >> But if the object we find was collected from walking through a >> >> TARGET_EXPR, we should leave the PLACEHOLDER_EXPR alone, so that it >> >> can be replaced later with the actual target of the initialization. >> > >> > Unfortunately, I still don't understand; guess I'll have to drop this PR. >> > >> > With this we put TARGET_EXPRs on a stack, and then when we find a >> > PLACEHOLDER_EXPR we walk the stack to find a TARGET_EXPR of the same type >> > as >> > the PLACEHOLDER_EXPR. There are three simplified examples I've been >> > playing >> > with: >> > >> > B b = T_E >}> >> > >> > - here we should replace the P_E; on the stack there are two >> > TARGET_EXPRs of types B and A >> > >> > C c = T_E >)> >> > >> > - here we shouldn't replace the P_E; on the stack there are two >> > TARGET_EXPRs of types X and C >> > >> > B b = T_E }}> >> > >> > - here we should replace the P_E; on the stack there's one TARGET_EXPR >> > of type B >> > >> > In each case we find a TARGET_EXPR of the type of the PLACEHOLDER_EXPR, >> > but I >> > don't see how to decide which PLACEHOLDER_EXPR we should let slide. Sorry >> > for >> > being dense... >> >> I was thinking that we want to replace the type of the first entry in >> the stack (B, C, B respectively), and leave others alone. > > Even that didn't work for me, because we may end up with a case of > > C c = bar (T_E >) Why is that a problem? Your patch doesn't fix this variant: struct X { unsigned i; unsigned n = i; }; unsigned int bar (X x) { return x.n; } int main() { X x = { 2, bar (X{1}) }; if (x.n != 1) __builtin_abort (); } Here we have two X objects, and we need to recognize them as distinct. Jason
Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)
On 05/03/2017 09:53 AM, Joseph Myers wrote: On Tue, 2 May 2017, Martin Sebor wrote: In bug 80280 - Missing closing quote (%>) c/semantics.c and c/c-typeck.c, a translator points out one of a number of kinds of cosmetic problems that tend to come up late in development, during translation of GCC messages. Other, arguably more minor, kinds of issues are caused by forgetting to use proper quoting when referencing tree nodes, such as %D or %T. To help avoid these kinds of bugs, the attached patch enhances the -Wformat checker to detect these common quoting issues and report them as warnings. As I see it, there are four kinds of format specifiers in this context: * Specifiers that have no special interaction with quoting and may be used either quoted or unquoted. For example, %d, %s and %E. (%E is only of that kind when used for expressions - when hopefully it should only be used for simple expressions such as constants. Uses for identifiers should be quoted; cases where there can be complex expressions should be replaced by use of location ranges. Ideally identifiers and expressions should have different formats, so these cases can be distinguished and so identifiers can end up with a static type other than "tree".) * Specifiers that should be quoted. For example, %D. * Specifiers that open quoting (%<). * Specifiers that close quoting (%>). (In addition, the q flag serves to quote the specifier it's applied to.) Thanks for comments. That above matches my view. The remaining six patches in the series correct the problems highlighted by the warning and get GCC to bootstrap and pass regression tests on x86_64. I suspect additional fixes will be needed to get GCC to bootstrap on other targets. I'll do powerpc64le next but besides a general review I'm looking for suggestions how to do the same cleanup on all the other targets with the least disruption. (E.g., if there's a way to roll out a warning one target at a time I could introduce it under a suboption of -Wformat and enable the subobption with -Werror only on the targets that have already been verified.) Use contrib/config-list.mk, with a native compiler with this patch in the PATH, to test building compilers for many configurations. (No doubt you'll also find existing build issues, which may or may not be filed in Bugzilla.) I can do some of it but not all of it. The work doesn't involve just building the compiler but also running the tests and fixing up regressions in those that are written to expect the unquoted diagnostics. I don't have the ability to run the test suite on all the targets, or the bandwidth to run it on a subset of them beyond powerpc64 and x86_64. So I'm hoping to find a way for the core of the patch to be checked in and for the cleanup to proceed subsequently, as target maintainers find time. + /* Diagnose nested or unmatched quoting directives such as GCC's +"%<...%<" and "%>...%>". As a special case, a directive can +be considered to both begin and end quoting (e.g., GCC's %E). +Such a directive is recognized but not diagosed. */ I don't think it makes conceptual sense to consider %E to both begin and end quoting. I'd expect %E to have exactly the same begin_quote and end_quote (or whatever) data as %d and %s, because it has the same properties (may be used quoted or unquoted). Yes, that's exactly how %E is treated by the code. The special case the comment above refers to is meant to explain how the begin_quote and end_quote local variables get set, not describe a concept in the design. I renamed the variables so as not to suggest otherwise. diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h index 13ca8ea..8932861 100644 --- a/gcc/c-family/c-format.h +++ b/gcc/c-family/c-format.h @@ -132,6 +132,11 @@ struct format_type_detail struct format_char_info { const char *format_chars; + /* Strings of FORMAT_CHARS characters that begin and end quoting, + respectively, and pairs of which should be matched in the same + format string. NULL when no such pairs exist. */ + const char *quote_begin_chars; + const char *quote_end_chars; I don't think this comment is sufficiently detailed to make it obvious what should appear in each field for each of the four kinds of format specifiers I enumerated. The best I can reverse-engineer from the code is: NULL for specifiers that may be used either quoted or unquoted *or* listing those specifiers in both quote_begin_chars and quote_end_chars (but I think the option of listing them in both should be removed as conceptually wrong); if the character is an opening or closing quote, list it in the appropriate one; if it must be used quoted, but isn't a quote itself, both strings must be non-NULL but it must not be listed in either. Whether that's right or wrong, the comment needs to make the rules clear. I've clarified the comment. Thanks Martin PR translation/80280 - Missing closing quote (%>) c/s
Re: [PATCH] Fix ICE in modified_type_die (PR debug/80461)
On Wed, Apr 19, 2017 at 12:29 PM, Jeff Law wrote: > On 04/19/2017 10:13 AM, Jakub Jelinek wrote: >> >> On Wed, Apr 19, 2017 at 09:55:19AM -0600, Jeff Law wrote: >>> >>> On 04/19/2017 09:10 AM, Jakub Jelinek wrote: On Wed, Apr 19, 2017 at 08:57:52AM -0600, Jeff Law wrote: >> >> 2017-04-19 Jakub Jelinek >> >> PR debug/80461 >> * dwarf2out.c (modified_type_die, gen_type_die_with_usage): >> Check for t with zero TYPE_QUALS_NO_ADDR_SPACE. >> >> * g++.dg/debug/pr80461.C: New test. > > I'm going to assume your use of TYPE_QUALS_NO_ADDR_SPACE vs TYPE_QUALS > or > TYPE_QUALS_NO_ADDR_SPACE_NO_ATOMIC is correct. I don't really know. For address space quals I think one would need to have pointer-to-members in different code address spaces, not sure if we support anything like that. And atomic is C only which doesn't have pointer-to-members. >>> >>> To put it another way, in your message you indicated that >>> modified_type_die >>> expects either an unqualified type or the type itself and that your patch >>> makes sure we only pick unqualified types. >>> >>> Using TYPE_QUALS_NO_ADDR_SPACE like you have seems to conflict with those >>> statements. So I'm curious why you allow address space qualifiers to >>> pass >>> through, but no others. It just seems odd. >> >> >> I used TYPE_QUALS_NO_ADDR_SPACE because that seems to be used in >> modified_type_die elsewhere (dwarf2out.c has only one use of TYPE_QUALS >> and >> even that one ignores addr space bits, as it is masked with qual_mask; >> the rest just TYPE_QUALS_NO_ADDR_SPACE). I think with FUNCTION/METHOD >> types >> with addr space quals we end up in grossly untested territory that most >> likely will just not work at all. I think we don't do anything with the >> address space quals right now, in the future DWARF has segment identifiers >> and something like that could be emitted, but there needs to be ABI >> agreement on what it means. > > OK. So let's go with your patch -- my reading of modified_type_die was that > it would ignore address-space qualifiers. So I think your patch is safe, it > was the inconsistency in the message and the patch itself I was most > concerned about. BTW, I think it would be a bit more correct to compare the quals with those of "main" rather than compare them to 0, though I suspect there aren't currently any types for which that produces a different result. Jason
Re: [PATCH] [PR79542][Ada] Fix ICE in dwarf2out.c with nested func. inlining
On Tue, Mar 14, 2017 at 8:24 AM, Pierre-Marie de Rodat wrote: > Hello, > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79542 reports an ICE in > dwarf2out.c for an Ada testcase built with optimization. > > This crash happens during the late generation pass because > add_gnat_descriptive_type cannot find the type DIE corresponding to some > descriptive type after having tried to generate it. This is because the > DIE was generated during the early generation pass, but then pruned by > the type pruning machinery. So why was it pruned? > > We are in a situation where we have cloned types (because of inlining, > IIUC) whose TYPE_NAME have non-null DECL_ABSTRACT_ORIGIN attributes. As > a consequence: > > * In modified_type_die, the "handle C typedef types" part calls > gen_type_die on the cloned type. > > * gen_type_die matches a typedef variant, and then calls gen_decl_die > on its TYPE_NAME, which will end up calling gen_typedef_die. > > * gen_typedef_die checks decl_ultimate_origin for this TYPE_DECL, and > finds one, so it only adds a DW_AT_abstract_origin attribute to the > DW_TAG_typedef DIE, but the cloned type itself does not get its own > DIE. That seems like a bug; if gen_typedef_die is going to generate a DIE for a cloned typedef, it needs to associate the type with the DIE. > * Back in modified_type_die, the call to lookup_type_die on the type > passed to gen_type_die returns NULL. > In the end, whole type trees, i.e. the ones referenced by > DECL_ABSTRACT_ORIGIN attributes, are never referenced from type pruning > "roots" and are thus pruned. The descriptive type at stake here is one > of them, hence the assertion failure. > > This patch attemps to fix that with what seems to be the most sensible > thing to do in my opinion: updating the "handle C typedef types" part in > modified_type_die to check decl_ultimate_origin before calling > gen_type_die: if that function returns something not null, then we know > that gen_type_die/gen_typedef_die will not generate a DIE for the input > type, so we try to process the ultimate origin instead. This soundsn good; the DWARF standard says that we don't need to have a die at all for the cloned typedef. > @@ -12496,6 +12496,18 @@ modified_type_die (tree type, int cv_quals, bool > reverse, > >if (qualified_type == dtype) > { > + tree origin > + = TYPE_NAME (qualified_type) == NULL > +? NULL > +: decl_ultimate_origin (TYPE_NAME (qualified_type)); This is unnecessarily complicated; at this point we already know that TYPE_NAME (qualified_type) is non-null and in the variable "name". > + /* Typedef variants that have an abstract origin don't get their own > +type DIE (see gen_typedef_die), so fall back on the ultimate gen_typedef_die does create a DIE for them, it just doesn't do it properly. But we could change gen_typedef_die to abort in that case, making this comment correct. Jason
Re: [PATCH 1/4][PR tree-optimization/78496] Don't simplify conditionals too early in VRP
On 05/03/2017 10:55 AM, Bin.Cheng wrote: On Wed, May 3, 2017 at 5:32 PM, Jeff Law wrote: [ With the patch attached... ] On 05/03/2017 10:31 AM, Jeff Law wrote: This is the first of 3-5 patches to address pr78496. The goal of these patches is to catch jump threads earlier in the pipeline to avoid undesirable behavior in PRE and more generally be able to exploit the secondary opportunities exposed by jump threading. One of the more serious issues I found while investigating 78496 was VRP failing to find what should have been obvious jump threads. The fundamental issue is VRP will simplify conditionals which are fed by a typecast prior to jump threading. So something like this: x = (typecast) y; if (x == 42) Can often be transformed into: if (y == 42) The problem is any ASSERT_EXPRS after the conditional will reference "x" rather than "y". That in turn makes it impossible for VRP to use those ASSERT_EXPRs to thread later jumps that use x == More concretely consider this gimple code: ;; basic block 5, loop depth 0, count 0, freq 1, maybe hot ;;prev block 4, next block 12, flags: (NEW, REACHABLE, VISITED) ;;pred: 3 [50.0%] (TRUE_VALUE,EXECUTABLE) ;;4 [100.0%] (FALLTHRU,EXECUTABLE) # iftmp.0_2 = PHI <1(3), 0(4)> in_loop_7 = (unsigned char) iftmp.0_2; if (in_loop_7 != 0) goto ; [33.00%] else goto ; [67.00%] ;;succ: 6 [33.0%] (TRUE_VALUE,EXECUTABLE) ;;12 [67.0%] (FALSE_VALUE,EXECUTABLE) ;; basic block 12, loop depth 0, count 0, freq 6700, maybe hot ;;prev block 5, next block 6, flags: (NEW) ;;pred: 5 [67.0%] (FALSE_VALUE,EXECUTABLE) in_loop_15 = ASSERT_EXPR ; goto ; [100.00%] ;;succ: 7 [100.0%] (FALLTHRU) ;; basic block 6, loop depth 0, count 0, freq 3300, maybe hot ;;prev block 12, next block 7, flags: (NEW, REACHABLE, VISITED) ;;pred: 5 [33.0%] (TRUE_VALUE,EXECUTABLE) in_loop_14 = ASSERT_EXPR ; simple_iv (); ;;succ: 7 [100.0%] (FALLTHRU,EXECUTABLE) And later we have: ;; basic block 9, loop depth 0, count 0, freq 8476, maybe hot ;;prev block 8, next block 10, flags: (NEW, REACHABLE, VISITED) ;;pred: 7 [84.8%] (FALSE_VALUE,EXECUTABLE) if (in_loop_7 == 0) goto ; [36.64%] else goto ; [63.36%] VRP knows it can replace the uses of in_loop_7 in the conditionals in blocks 5 and 9 with iftmp.0_2 and happily does so *before* jump threading (but well after ASSERT_EXPR insertion). As a result VRP is unable to utilize the ASSERT_EXPRs in blocks 12 and 6 (which reference in_loop_7) to thread the jump at bb9 (which now references iftmp.0_2). The cases in pr78496 are slightly more complex, but boil down to the same core issue -- simplifying the conditional too early. Thankfully this is easy to fix. We just split the conditional simplification into two steps so that the transformation noted above occurs after jump threading (the other simplifications we want to occur before jump threading). This allows VRP1 to pick up 27 missed jump threads in the testcase from 78496. It could well be enough to address 78496, but since we don't have a solid description of the desired end result I won't consider 78496 fixed quite yet as there's significant further improvements we can make. Bootstrapped and regression tested on x86_64-linux-gnu. Installing on the trunk. Jeff diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 92a4e395ba8..32cc81978df 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2017-05-03 Jeff Law + + PR tree-optimization/78496 + * tree-vrp.c (simplify_cond_using_ranges_1): Renamed + from simplify_cond_using_ranges. Split off code to walk + backwards through casts into ... + (simplify_cond_using_ranges_2): New function. + (simplify_stmt_using_ranges): Call simplify_cond_using_ranges_1. + (execute_vrp): After identifying jump threads, call + simplify_cond_using_ranges_2. + 2017-05-03 Jeff Downs Rainer Orth diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 596468d33e9..55a44e4635a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-05-03 Jeff Law + + PR tree-optimization/78496 + * gcc.dg/tree-ssa/ssa-thread-15.c: New test. + 2017-05-03 Uros Bizjak * g++.dg/lto/pr79671_0.C (foo): Fix asm constraints. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c new file mode 100644 index 000..f73268cacbb --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-vrp1" } */ + +/* We should thread the if (!in_loop) completely leaving + just two conditionals. */ +/* { dg-final { scan-tree-dump-times "if \\(" 2 "vrp1" } } */ + + +union tree_node; +typedef union tree_node *tree; + +enum
[PATCH, rs6000] Avoid vectorizing versioned copy loops with vectorization factor 2
Hi, We recently became aware of some poor code generation as a result of unprofitable (for POWER) loop vectorization. When a loop is simply copying data with 64-bit loads and stores, vectorizing with 128-bit loads and stores generally does not provide any benefit on modern POWER processors. Furthermore, if there is a requirement to version the loop for aliasing, alignment, etc., the cost of the versioning test is almost certainly a performance loss for such loops. The user code example included such a copy loop, executed only a few times on average, within an outer loop that was executed many times on average, causing a tremendous slowdown. This patch very specifically targets these kinds of loops and no others, and artificially inflates the vectorization cost to ensure vectorization does not appear profitable. This is done within the target model cost hooks to avoid affecting other targets. A new test case is included that demonstrates the refusal to vectorize. We've done SPEC performance testing to verify that the patch does not degrade such workloads. Results were all in the noise range. The customer code performance loss was verified to have been reversed. Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no regressions. Is this ok for trunk? Thanks, Bill [gcc] 2017-05-03 Bill Schmidt * config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var. (rs6000_init_cost): Initialize rs6000_vect_nonmem. (rs6000_add_stmt_cost): Update rs6000_vect_nonmem. (rs6000_finish_cost): Avoid vectorizing simple copy loops with VF=2 that require versioning. [gcc/testsuite] 2017-05-03 Bill Schmidt * gcc.target/powerpc/veresioned-copy-loop.c: New file. Index: gcc/config/rs6000/rs6000.c === --- gcc/config/rs6000/rs6000.c (revision 247560) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -5873,6 +5873,8 @@ rs6000_density_test (rs6000_cost_data *data) /* Implement targetm.vectorize.init_cost. */ +static bool rs6000_vect_nonmem; + static void * rs6000_init_cost (struct loop *loop_info) { @@ -5881,6 +5883,7 @@ rs6000_init_cost (struct loop *loop_info) data->cost[vect_prologue] = 0; data->cost[vect_body] = 0; data->cost[vect_epilogue] = 0; + rs6000_vect_nonmem = false; return data; } @@ -5907,6 +5910,19 @@ rs6000_add_stmt_cost (void *data, int count, enum retval = (unsigned) (count * stmt_cost); cost_data->cost[where] += retval; + + /* Check whether we're doing something other than just a copy loop. +Not all such loops may be profitably vectorized; see +rs6000_finish_cost. */ + if ((where == vect_body + && (kind == vector_stmt || kind == vec_to_scalar || kind == vec_perm + || kind == vec_promote_demote || kind == vec_construct + || kind == scalar_to_vec)) + || (where != vect_body + && (kind == vec_to_scalar || kind == vec_perm + || kind == vec_promote_demote || kind == vec_construct + || kind == scalar_to_vec))) + rs6000_vect_nonmem = true; } return retval; @@ -5923,6 +5939,19 @@ rs6000_finish_cost (void *data, unsigned *prologue if (cost_data->loop_info) rs6000_density_test (cost_data); + /* Don't vectorize minimum-vectorization-factor, simple copy loops + that require versioning for any reason. The vectorization is at + best a wash inside the loop, and the versioning checks make + profitability highly unlikely and potentially quite harmful. */ + if (cost_data->loop_info) +{ + loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info); + if (!rs6000_vect_nonmem + && LOOP_VINFO_VECT_FACTOR (vec_info) == 2 + && LOOP_REQUIRES_VERSIONING (vec_info)) + cost_data->cost[vect_body] += 1; +} + *prologue_cost = cost_data->cost[vect_prologue]; *body_cost = cost_data->cost[vect_body]; *epilogue_cost = cost_data->cost[vect_epilogue]; Index: gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c === --- gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c (nonexistent) +++ gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c (working copy) @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ + +/* Verify that a pure copy loop with a vectorization factor of two + that requires alignment will not be vectorized. See the cost + model hooks in rs6000.c. */ + +typedef long unsigned int size_t; +typedef unsigned char uint8_t; + +extern void *memcpy (void *__restrict __dest, const void *__restrict __src, + size_t __n) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1, 2))); + +void foo (void *dstPtr, const void *srcPtr, void *
Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)
On 05/03/2017 12:47 PM, Martin Sebor wrote: I can do some of it but not all of it. The work doesn't involve just building the compiler but also running the tests and fixing up regressions in those that are written to expect the unquoted diagnostics. I don't have the ability to run the test suite on all the targets, or the bandwidth to run it on a subset of them beyond powerpc64 and x86_64. So I'm hoping to find a way for the core of the patch to be checked in and for the cleanup to proceed subsequently, as target maintainers find time. Just a note. It's not terrible to run non-execute tests with cross tools. In fact, it's fairly easy. I'm happy to walk you through the process. Long term I think we want to move away from config-list.mk and instead use a buildbot. I've been experimenting with jenkins.I've got a series of jobs that will build the *-elf, *-rtems, *-gnu and *-linux targets. binutils, then gcc, then newlib/glibc. The newlib (*-elf and *-rtems) pipeline is pretty solid at this point and covers ~70 targets from config-list.mk building through newlib. The glibc pipeline is more problematic, but improving. It's about ready to leave my little cluster of slaves and relocate to the servers in Toronto. It should be fairly straightforward to take that jenkins work and tie in testsuite runs. Even if we just start with compile.exp and expand one day (via the simulators) to the execution tests. Jeff
Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)
On Wed, 3 May 2017, Martin Sebor wrote: > > Use contrib/config-list.mk, with a native compiler with this patch in the > > PATH, to test building compilers for many configurations. (No doubt > > you'll also find existing build issues, which may or may not be filed in > > Bugzilla.) > > I can do some of it but not all of it. The work doesn't involve > just building the compiler but also running the tests and fixing > up regressions in those that are written to expect the unquoted > diagnostics. I don't have the ability to run the test suite on > all the targets, or the bandwidth to run it on a subset of them > beyond powerpc64 and x86_64. So I'm hoping to find a way for > the core of the patch to be checked in and for the cleanup to > proceed subsequently, as target maintainers find time. When it's architecture-specific tests, I think it's up to people testing on those architectures to fix them. > > > diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h > > > index 13ca8ea..8932861 100644 > > > --- a/gcc/c-family/c-format.h > > > +++ b/gcc/c-family/c-format.h > > > @@ -132,6 +132,11 @@ struct format_type_detail > > > struct format_char_info > > > { > > >const char *format_chars; > > > + /* Strings of FORMAT_CHARS characters that begin and end quoting, > > > + respectively, and pairs of which should be matched in the same > > > + format string. NULL when no such pairs exist. */ > > > + const char *quote_begin_chars; > > > + const char *quote_end_chars; > > > > I don't think this comment is sufficiently detailed to make it obvious > > what should appear in each field for each of the four kinds of format > > specifiers I enumerated. The best I can reverse-engineer from the code > > is: NULL for specifiers that may be used either quoted or unquoted *or* > > listing those specifiers in both quote_begin_chars and quote_end_chars > > (but I think the option of listing them in both should be removed as > > conceptually wrong); if the character is an opening or closing quote, list > > it in the appropriate one; if it must be used quoted, but isn't a quote > > itself, both strings must be non-NULL but it must not be listed in either. > > > > Whether that's right or wrong, the comment needs to make the rules clear. > > I've clarified the comment. Clarifying the comment is helpful, but a data structure involving putting the same character in both still doesn't make sense to me. It would seem a lot clearer to (for example) split "DFKTEV" into separate "DFTV" and "EK" cases, where "EK" uses NULL there just like "s", "d" etc. do. -- Joseph S. Myers jos...@codesourcery.com
New German PO file for 'gcc' (version 7.1.0)
Hello, gentle maintainer. This is a message from the Translation Project robot. A revised PO file for textual domain 'gcc' has been submitted by the German team of translators. The file is available at: http://translationproject.org/latest/gcc/de.po (This file, 'gcc-7.1.0.de.po', has just now been sent to you in a separate email.) All other PO files for your package are available in: http://translationproject.org/latest/gcc/ Please consider including all of these in your next release, whether official or a pretest. Whenever you have a new distribution with a new version number ready, containing a newer POT file, please send the URL of that distribution tarball to the address below. The tarball may be just a pretest or a snapshot, it does not even have to compile. It is just used by the translators when they need some extra translation context. The following HTML page has been updated: http://translationproject.org/domain/gcc.html If any question arises, please contact the translation coordinator. Thank you for all your work, The Translation Project robot, in the name of your translation coordinator.
Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)
On 05/03/2017 01:59 PM, Joseph Myers wrote: On Wed, 3 May 2017, Martin Sebor wrote: Use contrib/config-list.mk, with a native compiler with this patch in the PATH, to test building compilers for many configurations. (No doubt you'll also find existing build issues, which may or may not be filed in Bugzilla.) I can do some of it but not all of it. The work doesn't involve just building the compiler but also running the tests and fixing up regressions in those that are written to expect the unquoted diagnostics. I don't have the ability to run the test suite on all the targets, or the bandwidth to run it on a subset of them beyond powerpc64 and x86_64. So I'm hoping to find a way for the core of the patch to be checked in and for the cleanup to proceed subsequently, as target maintainers find time. When it's architecture-specific tests, I think it's up to people testing on those architectures to fix them. diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h index 13ca8ea..8932861 100644 --- a/gcc/c-family/c-format.h +++ b/gcc/c-family/c-format.h @@ -132,6 +132,11 @@ struct format_type_detail struct format_char_info { const char *format_chars; + /* Strings of FORMAT_CHARS characters that begin and end quoting, + respectively, and pairs of which should be matched in the same + format string. NULL when no such pairs exist. */ + const char *quote_begin_chars; + const char *quote_end_chars; I don't think this comment is sufficiently detailed to make it obvious what should appear in each field for each of the four kinds of format specifiers I enumerated. The best I can reverse-engineer from the code is: NULL for specifiers that may be used either quoted or unquoted *or* listing those specifiers in both quote_begin_chars and quote_end_chars (but I think the option of listing them in both should be removed as conceptually wrong); if the character is an opening or closing quote, list it in the appropriate one; if it must be used quoted, but isn't a quote itself, both strings must be non-NULL but it must not be listed in either. Whether that's right or wrong, the comment needs to make the rules clear. I've clarified the comment. Clarifying the comment is helpful, but a data structure involving putting the same character in both still doesn't make sense to me. It would seem a lot clearer to (for example) split "DFKTEV" into separate "DFTV" and "EK" cases, where "EK" uses NULL there just like "s", "d" etc. do. Then the begin/end strings for the "DFTV" entry will be the empty string (to indicate that they are expected to be quoted), as in the attached incremental diff. Let me know if I misunderstood and you had something else in mind. FWIW, I don't mind doing this way if you prefer, but I'm hard pressed to see the improvement. All it did is grow the size of the tables. The code stayed the same. Martin diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c index f64b6e0..ad56835 100644 --- a/gcc/c-family/c-format.c +++ b/gcc/c-family/c-format.c @@ -688,7 +688,8 @@ static const format_char_info gcc_diag_char_table[] = { "K", NULL, NULL, 0, STD_C89, { T89_V, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "q","", NULL }, { "r", NULL, NULL, 1, STD_C89, { T89_C, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "","cR", NULL }, - { "<>'R","<'R", ">'R", 0, STD_C89, NOARGUMENTS, "", "", NULL }, + { "<>","<", ">", 0, STD_C89, NOARGUMENTS, "", "", NULL }, + { "'R", NULL, NULL, 0, STD_C89, NOARGUMENTS, "", "", NULL }, { "m", NULL, NULL, 0, STD_C89, NOARGUMENTS, "q", "", NULL }, { NULL, NULL, NULL, 0, STD_C89, NOLENGTHS, NULL, NULL, NULL } }; @@ -706,12 +707,13 @@ static const format_char_info gcc_tdiag_char_table[] = /* Custom conversion specifiers. */ /* These will require a "tree" at runtime. */ - { "DFKTEV", "EK", "EK", 0, STD_C89, { T89_V, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "q+", "", NULL }, - + { "DFTV", "", "", 0, STD_C89, { T89_V, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "q+", "", NULL }, + { "EK", NULL, NULL, 0, STD_C89, { T89_V, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "q+", "", NULL }, { "v", NULL, NULL, 0, STD_C89, { T89_I, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "q#", "", NULL }, { "r", NULL, NULL, 1, STD_C89, { T89_C, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "","cR", NULL }, - { "<>'R", "<'R", ">'R", 0, STD_C89, NOARGUMENTS, "", "", NULL }, + { "<>", "<", ">",0, STD_C89, NOARGUMENTS, "", "", NULL }, + { "'R", NULL, NULL, 0, STD_C89, NOARGUMENTS, "", "", NULL }, { "m", NULL, NU
Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)
On Wed, 3 May 2017, Martin Sebor wrote: > > Clarifying the comment is helpful, but a data structure involving putting > > the same character in both still doesn't make sense to me. It would seem > > a lot clearer to (for example) split "DFKTEV" into separate "DFTV" and > > "EK" cases, where "EK" uses NULL there just like "s", "d" etc. do. > > Then the begin/end strings for the "DFTV" entry will be the empty > string (to indicate that they are expected to be quoted), as in > the attached incremental diff. Let me know if I misunderstood > and you had something else in mind. Yes, that's what I'd expect (incrementally). > FWIW, I don't mind doing this way if you prefer, but I'm hard > pressed to see the improvement. All it did is grow the size of > the tables. The code stayed the same. Really I think it might be better not to have pointers / strings there at all - rather, have a four-state enum value that says directly whether those format specifiers are quote-neutral, should-be-quoted, left-quote or right-quote. Or that information could go in the existing flags2 field, '"' to mean should-be-quoted, '<' to mean left-quote and '>' to mean right-quote, for example. -- Joseph S. Myers jos...@codesourcery.com
[gomp4] Don't mark OpenACC auto loops as independent inside acc parallel regions
The OpenACC 2.5 spec updated the behavior of acc loops inside acc parallel regions such that loop with seq and auto clauses are not implicitly independent. Back in OpenACC 2.0, all loops inside acc parallel regions were implicitly independent. Oddly enough, if the user just places an acc loop without any clauses, it is still implicitly independent. E.g. #pragma acc loop implies #pragma acc loop independent which is not equal to #pragma acc loop auto I suppose the auto flag is used to explicitly have the compiler "automatically" detect loop dependencies and partition the loop accordingly. This patch, which I've applied to gomp-4_0-branch makes GCC comply with this new behavior. Cesar 2017-05-03 Cesar Philippidis gcc/ * omp-low.c (lower_oacc_head_mark): Don't mark OpenACC auto loops as independent inside acc parallel regions. gcc/testsuite/ * c-c++-common/goacc/loop-auto-1.c: Adjust test case to conform to the new behavior of the auto clause in OpenACC 2.5. * c-c++-common/goacc/loop-auto-2.c: Likewise. * gcc.dg/goacc/loop-processing-1.c: Likewise. * c-c++-common/goacc/loop-auto-3.c: New test. * gfortran.dg/goacc/loop-auto-1.f90: New test. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Adjust test case to conform to the new behavior of the auto clause in OpenACC 2.5. diff --git a/gcc/omp-low.c b/gcc/omp-low.c index cf299c12..9e9a363 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -6638,9 +6638,10 @@ lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses, tag |= OLF_GANG_STATIC; } - /* In a parallel region, loops are implicitly INDEPENDENT. */ + /* In a parallel region, loops without auto and seq clauses are + implicitly INDEPENDENT. */ omp_context *tgt = enclosing_target_ctx (ctx); - if (!tgt || is_oacc_parallel (tgt)) + if ((!tgt || is_oacc_parallel (tgt)) && !(tag & (OLF_SEQ | OLF_AUTO))) tag |= OLF_INDEPENDENT; if (tag & OLF_TILE) diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c index 124befc..dcad07f 100644 --- a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c +++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c @@ -10,7 +10,7 @@ void Foo () #pragma acc loop seq for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) {} } @@ -20,7 +20,7 @@ void Foo () #pragma acc loop auto for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) { #pragma acc loop vector @@ -51,7 +51,7 @@ void Foo () #pragma acc loop vector for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int kx = 0; kx < 10; kx++) {} } @@ -64,27 +64,27 @@ void Foo () } -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 10; ix++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int kx = 0; kx < 10; kx++) {} } } -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 10; ix++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int kx = 0; kx < 10; kx++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int lx = 0; lx < 10; lx++) {} } } @@ -101,7 +101,7 @@ void Gang (void) #pragma acc loop seq for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) {} } @@ -111,7 +111,7 @@ void Gang (void) #pragma acc loop auto for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) { #pragma acc loop vector @@ -142,7 +142,7 @@ void Gang (void) #pragma acc loop vector for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int kx = 0; kx < 10; kx++) {} } @@ -176,7 +176,7 @@ void Worker (void) #pragma acc loop seq for (int j
Re: [wwwdocs, ARM, AArch64] Document ABI changes and fixes
On Tue, 2 May 2017, Richard Earnshaw (lists) wrote: > This patch adds some release notes for the gcc ABI changes affecting ARM > and AArch64. This looks fine, thank you. The one thing you may want to look into is where it says "code where class objects are passed by value to functions" which feels a bit odd. Perhaps "...passed to functions by value" or just omit "to functions" given how common of a phrase "passing by value" is? Gerald
[PATCH] Fix -fopt-info documentation in invoke.texi
The description of the default behavour of -fopt-info in invoke.texi is wrong. This patch fixes it. I also added a sentence to explicitly say what is implied by the note that -fopt-info-vec-missed is the same as -fopt-info-missed-vec. Namely, that order doesn't matter. OK to checkin? Steve Ellcey sell...@cavium.com 2017-05-03 Steve Ellcey * doc/invoke.texi (-fopt-info): Fix description of default behavour. Explicitly say order of options included in -fopt-info does not matter. diff --git a/gcc/doc/optinfo.texi b/gcc/doc/optinfo.texi index e17cb37..f4158a0 100644 --- a/gcc/doc/optinfo.texi +++ b/gcc/doc/optinfo.texi @@ -208,16 +208,17 @@ optimized locations from all the inlining passes into If the @var{filename} is provided, then the dumps from all the applicable optimizations are concatenated into the @file{filename}. Otherwise the dump is output onto @file{stderr}. If @var{options} is -omitted, it defaults to @option{all-all}, which means dump all -available optimization info from all the passes. In the following -example, all optimization info is output on to @file{stderr}. +omitted, it defaults to @option{optimized-optall}, which means dump +information about successfully applied optimizations from all the passes. +In the following example, the optimization info is output on to @file{stderr}. @smallexample gcc -O3 -fopt-info @end smallexample Note that @option{-fopt-info-vec-missed} behaves the same as -@option{-fopt-info-missed-vec}. +@option{-fopt-info-missed-vec}. The order of the optimization group +names and message types listed after @option{-fopt-info} does not matter. As another example, consider
Re: [testsuite, committed] Replace absolute line numbers in c-c++-common
On May 3, 2017, at 12:39 AM, Tom de Vries wrote: > diff --git a/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c > b/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c > index 8f14034..7df1804 100644 > --- a/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c > +++ b/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c > @@ -7,6 +7,7 @@ enum E { > A = 0 << 1, > B = 1 << 1, > C = -1 << 1, /* { dg-warning "left shift of negative value|not an integer > constant" } */ > + /* { dg-error "left operand of shift expression" "shift" { target c++ } > .-1 } */ > D = 0 >> 1, > E = 1 >> 1, > F = -1 >> 1 > @@ -47,5 +48,3 @@ right (int x) > r += -1U >> x; > return r; > } > - > -/* { dg-error "left operand of shift expression" "shift" { target c++ } 9 } > */ I like this type of change, thanks.
Cap niter_for_unrolled_loop to upper bound
For the reasons explained in PR77536, niter_for_unrolled_loop assumes 5 iterations in the absence of profiling information, although it doesn't increase beyond the estimate for the original loop. This left a hole in which the new estimate could be less than the old one but still greater than the limit imposed by CEIL (nb_iterations_upper_bound, unroll factor). Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Thanks, Richard gcc/ 2017-05-04 Richard Sandiford * tree-ssa-loop-manip.c (niter_for_unrolled_loop): Add commentary to explain the use of truncating division. Cap the number of iterations to the maximum given by nb_iterations_upper_bound, if defined. gcc/testsuite/ * gcc.dg/vect/vect-profile-1.c: New test. Index: gcc/tree-ssa-loop-manip.c === --- gcc/tree-ssa-loop-manip.c 2017-05-03 08:46:26.068861808 +0100 +++ gcc/tree-ssa-loop-manip.c 2017-05-04 07:41:56.686034705 +0100 @@ -1104,6 +1104,9 @@ niter_for_unrolled_loop (struct loop *lo gcc_assert (factor != 0); bool profile_p = false; gcov_type est_niter = expected_loop_iterations_unbounded (loop, &profile_p); + /* Note that this is really CEIL (est_niter + 1, factor) - 1, where the + "+ 1" converts latch iterations to loop iterations and the "- 1" + converts back. */ gcov_type new_est_niter = est_niter / factor; /* Without profile feedback, loops for which we do not know a better estimate @@ -1120,6 +1123,15 @@ niter_for_unrolled_loop (struct loop *lo new_est_niter = 5; } + if (loop->any_upper_bound) +{ + /* As above, this is really CEIL (upper_bound + 1, factor) - 1. */ + widest_int bound = wi::udiv_floor (loop->nb_iterations_upper_bound, +factor); + if (wi::ltu_p (bound, new_est_niter)) + new_est_niter = bound.to_uhwi (); +} + return new_est_niter; } Index: gcc/testsuite/gcc.dg/vect/vect-profile-1.c === --- /dev/null 2017-05-04 07:24:39.449302696 +0100 +++ gcc/testsuite/gcc.dg/vect/vect-profile-1.c 2017-05-04 07:41:56.685075916 +0100 @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fdump-tree-vect-details-blocks" } */ + +/* At least one of these should correspond to a full vector. */ + +void +f1 (int *x) +{ + for (int j = 0; j < 2; ++j) +x[j] += 1; +} + +void +f2 (int *x) +{ + for (int j = 0; j < 4; ++j) +x[j] += 1; +} + +void +f3 (int *x) +{ + for (int j = 0; j < 8; ++j) +x[j] += 1; +} + +void +f4 (int *x) +{ + for (int j = 0; j < 16; ++j) +x[j] += 1; +} + +/* { dg-final { scan-tree-dump {goto ; \[0+.0*%\]} vect } } */
Misc. BRIG/HSAIL FE updates
Hi, In r247576 I committed the patch below, which has minor updates and fixes to the BRIG/HSAIL frontend. Index: gcc/brig/brigfrontend/brig-code-entry-handler.cc === --- gcc/brig/brigfrontend/brig-code-entry-handler.cc (revision 247575) +++ gcc/brig/brigfrontend/brig-code-entry-handler.cc (revision 247576) @@ -464,7 +464,24 @@ uint64_t offs = gccbrig_to_uint64_t (addr_operand.offset); if (offs > 0 || addr == NULL_TREE) { - tree const_offset_2 = build_int_cst (size_type_node, offs); + /* In large mode, the offset is treated as 32bits unless it's + global, readonly or kernarg address space. + See: + http://www.hsafoundation.com/html_spec111/HSA_Library.htm + #PRM/Topics/02_ProgModel/small_and_large_machine_models.htm + #table_machine_model_data_sizes */ + + int is64b_offset = segment == BRIG_SEGMENT_GLOBAL + || segment == BRIG_SEGMENT_READONLY + || segment == BRIG_SEGMENT_KERNARG; + + /* The original offset is signed and should be sign + extended for the pointer arithmetics. */ + tree const_offset_2 = is64b_offset +? build_int_cst (size_type_node, offs) +: convert (long_integer_type_node, + build_int_cst (integer_type_node, offs)); + if (addr == NULL_TREE) addr = const_offset_2; else @@ -1265,6 +1282,10 @@ operand_type = uint32_type_node; half_to_float = false; } + else if (brig_inst.opcode == BRIG_OPCODE_ACTIVELANEPERMUTE && i == 4) + { + operand_type = uint32_type_node; + } else if (half_to_float) /* Treat the operands as the storage type at this point. */ operand_type = half_storage_type; Index: gcc/brig/ChangeLog === --- gcc/brig/ChangeLog (revision 247575) +++ gcc/brig/ChangeLog (revision 247576) @@ -1,3 +1,11 @@ +2017-05-03 Pekka Jääskeläinen + + * brigfrontend/brig-code-entry-handler.cc + (brig_code_entry_handler::build_address_operand): Fix a bug + with reg+offset addressing on 32b segments. In large mode, + the offset is treated as 32bits unless it's global, readonly or + kernarg address space. + 2016-02-01 Pekka Jääskeläinen * brigfrontend/brig-code-entry-handler.cc: fix address Index: gcc/ChangeLog === --- gcc/ChangeLog (revision 247575) +++ gcc/ChangeLog (revision 247576) @@ -1,3 +1,8 @@ +2017-05-04 Pekka Jääskeläinen + + * brig-builtins.def: Added a builtin for class_f64. + * builtin-types.def: Added a builtin type needed by class_f64. + 2017-05-03 Jason Merrill * timevar.def: Add TV_CONSTEXPR. @@ -71,6 +76,7 @@ * ipa-inline.h (inline_summary): Add ctor. (create_ggc): Do not use ggc_cleared_alloc. +>>> .r247575 ^--- I removed this in the next commit. 2017-05-03 Jeff Downs Rainer Orth Index: gcc/builtin-types.def === --- gcc/builtin-types.def (revision 247575) +++ gcc/builtin-types.def (revision 247576) @@ -348,6 +348,8 @@ BT_INT, BT_INT, BT_INT) DEF_FUNCTION_TYPE_2 (BT_FN_UINT_FLOAT_UINT, BT_UINT, BT_FLOAT, BT_UINT) +DEF_FUNCTION_TYPE_2 (BT_FN_UINT_DOUBLE_UINT, + BT_UINT, BT_DOUBLE, BT_UINT) DEF_FUNCTION_TYPE_2 (BT_FN_FLOAT_UINT_UINT, BT_FLOAT, BT_UINT, BT_UINT) DEF_FUNCTION_TYPE_2 (BT_FN_ULONG_UINT_UINT, Index: gcc/brig-builtins.def === --- gcc/brig-builtins.def (revision 247575) +++ gcc/brig-builtins.def (revision 247576) @@ -222,6 +222,10 @@ BRIG_TYPE_F32, "__hsail_class_f32", BT_FN_UINT_FLOAT_UINT, ATTR_PURE_NOTHROW_LEAF_LIST) +DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_CLASS_F64, BRIG_OPCODE_CLASS, + BRIG_TYPE_F64, "__hsail_class_f64", BT_FN_UINT_DOUBLE_UINT, + ATTR_PURE_NOTHROW_LEAF_LIST) + DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_CLASS_F32_F16, BRIG_OPCODE_CLASS, BRIG_TYPE_F16, "__hsail_class_f32_f16", BT_FN_UINT_FLOAT_UINT, ATTR_PURE_NOTHROW_LEAF_LIST) Index: libhsail-rt/rt/workitems.c === --- libhsail-rt/rt/workitems.c (revision 247575) +++ libhsail-rt/rt/workitems.c (revision 247576) @@ -63,10 +63,6 @@ #define FIBER_STACK_SIZE (64*1024) #define GROUP_SEGMENT_ALIGN 256 -/* HSA requires WGs to be executed in flat work-group id order. Enabling - the following macro can reveal test cases that rely on the ordering, - but is not useful for much else. */ - uint32_t __hsail_workitemabsid (uint32_t dim, PHSAWorkItem *context); uint32_t __hsail_workitemid (uint32_t dim, PHSAWorkItem *context); Index: libhsail-rt/rt/arithmetic.c === --- libhsail-rt/rt/arithmetic.c (revision 247575) +++ libhsail-rt/rt/arithmetic.c (revision 247576) @@ -424,18 +424,34 @@ uint32_t __hsail_class_f32 (float a, uint32_t flags) { - return (flags & 0x0001 && isnan (a) && !(*(uint32_t *)