[PATCH] S/390: Implement vectory copysign
Hi, this patch implements vector copysign using vector select on S/390. Regtested and bootstrapped on s390x. Regards Robin -- gcc/ChangeLog: 2019-02-07 Robin Dapp * config/s390/vector.md: Implement vector copysign. gcc/testsuite/ChangeLog: 2019-02-07 Robin Dapp * gcc.target/s390/vector/vec-copysign-execute.c: New test. * gcc.target/s390/vector/vec-copysign.c: New test. diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index c9ffab4c8c2..820372eca29 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -1362,6 +1362,31 @@ operands[4] = CONST0_RTX (V2DImode); }) +; Vector copysign, implement using vector select +(define_expand "copysign3" + [(set (match_operand:VFT 0 "register_operand" "") + (if_then_else:VFT + (eq (match_dup 3) + (match_dup 4)) + (match_operand:VFT 1 "register_operand" "") + (match_operand:VFT 2 "register_operand" "")))] + "TARGET_VX" +{ + int sz = GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + int prec = GET_MODE_PRECISION (GET_MODE_INNER (mode)); + wide_int mask_val = wi::shwi (1l << (sz - 1), prec); + + rtx mask = gen_reg_rtx (mode); + + int nunits = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (nunits); + for (int i = 0; i < nunits; i++) +RTVEC_ELT (v, i) = GEN_INT (mask_val.to_shwi ()); + + mask = gen_rtx_CONST_VECTOR (mode, v); + operands[3] = force_reg (mode, mask); + operands[4] = CONST0_RTX (mode); +}) ;; ;; Integer compares diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-copysign-execute.c b/gcc/testsuite/gcc.target/s390/vector/vec-copysign-execute.c new file mode 100644 index 000..a8d675d3a72 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/vec-copysign-execute.c @@ -0,0 +1,74 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -mzarch -march=z13" } */ + +#include +#include + +#define N 20 + +double a[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9, +27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan("123"), __DBL_MIN__ / 2.0, +-nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199}; +double b[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0, +7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3}; +double r[N]; +double r2[N]; + +void +foo (void) +{ + for (int i = 0; i < N; i++) +r[i] = copysign (a[i], b[i]); +} + +__attribute__((optimize("no-tree-vectorize"))) +void +check (void) +{ + for (int i = 0; i < N; i++) +{ + r2[i] = copysign (a[i], b[i]); + assert (r[i] == r2[i] + || (isnan (r[i]) && isnan (r2[i]) + && signbit (r[i]) == signbit (r2[i]))); +} +} + +float af[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9, +27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan("123"), __DBL_MIN__ / 2.0, +-nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199}; +float bf[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0, +7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3}; +float rf[N]; +float rf2[N]; + +__attribute__ ((__target__ ("arch=z14"))) +void +foof (void) +{ + for (int i = 0; i < N; i++) +rf[i] = copysignf (af[i], bf[i]); +} + +__attribute__((optimize("no-tree-vectorize"))) +void +checkf (void) +{ + for (int i = 0; i < N; i++) +{ + rf2[i] = copysignf (af[i], bf[i]); + assert (rf[i] == rf2[i] + || (isnan (rf[i]) && isnan (rf2[i]) + && signbit (rf[i]) == signbit (rf2[i]))); +} +} + +int main() +{ + foo (); + check (); + + foof (); + checkf (); + return r[0]; +} diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-copysign.c b/gcc/testsuite/gcc.target/s390/vector/vec-copysign.c new file mode 100644 index 000..64c6970c23e --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/vec-copysign.c @@ -0,0 +1,38 @@ +/* { dg-do compile { target { s390*-*-* } } } */ +/* { dg-options "-O2 -ftree-vectorize -mzarch" } */ +/* { dg-final { scan-assembler-times "vgmg" 1 } } */ +/* { dg-final { scan-assembler-times "vgmf" 1 } } */ +/* { dg-final { scan-assembler-times "vsel" 2 } } */ + +#include + +#define N 20 + +double a[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9, +27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan ("123"), __DBL_MIN__ / 2.0, +-nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199}; +double b[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0, +7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3}; +double r[N]; +float af[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9, +27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan ("123"), __DBL_MIN__ / 2.0, +-nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199}; +float bf[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0, +7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3}; +float rf[N]; + +__attribute__ ((__target__ ("arch=z13"))) +void +foo (void) +{ + for (int i = 0; i < N; i++) +r[i] = copysign (a[i], b[i]); +} + +__attribute__ ((__target__ ("arch=z14"))) +void +foof (void) +{ + for (int i = 0; i < N;
[PATCH][arm] Use neon_dot_q type for 128-bit V[US]DOT instructions where appropriate
Hi all, For the Dot Product instructions we have the scheduling types neon_dot and neon_dot_q for the 128-bit versions. It seems that we're only using the former though, not assigning the neon_dot_q type anywhere. This patch fixes that by adding the mode attribute suffix to the type, similar to how we do it for other types in neon.md. Bootstrapped and tested on arm-none-linux-gnueabihf. Committing to trunk. Thanks, Kyrill 2019-07-02 Kyrylo Tkachov * config/arm/neon.md (neon_dot): Use neon_dot for type. (neon_dot_lane): Likewise. diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index f9d7ba35b137fed383f84eecbe81dd942943d216..4a2c7b99881e96fbff30a53d370ff0df1416c124 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -3542,7 +3542,7 @@ (define_insn "neon_dot" DOTPROD)))] "TARGET_DOTPROD" "vdot.\\t%0, %2, %3" - [(set_attr "type" "neon_dot")] + [(set_attr "type" "neon_dot")] ) ;; These instructions map to the __builtins for the Dot Product @@ -3561,7 +3561,7 @@ (define_insn "neon_dot_lane" = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4]))); return "vdot.\\t%0, %2, %P3[%c4]"; } - [(set_attr "type" "neon_dot")] + [(set_attr "type" "neon_dot")] ) ;; These expands map to the Dot Product optab the vectorizer checks for.
Re: [REVISED PATCH 5/9]: C++ P0482R5 char8_t: Standard library support
On 23/12/18 21:27 -0500, Tom Honermann wrote: Attached is a revised patch that addresses changes in P0482R6. Changes from the prior patch include: - Updated the value of the __cpp_char8_t feature test macro to 201811. Tested on x86_64-linux. Thanks, Tom, this is great work! The front-end changes for char8_t went in recently, and I'm finally ready to commit the library parts. There's one big problem I found in this patch, which is that the new numeric_limits specialization uses constexpr unconditionally. That fails if is compiled using options like -std=c++98 -fno-char8_t because the specialization will be used, but the constexpr keyword isn't allowed. That's easily fixed by replacing the keyword with _GLIBCXX_CONSTEXPR. The other way to solve that problem would be for the compiler to give an error if -fchar8_t is used with C++98, but I see no fundamental reason that combination of options shouldn't be allowed. We can support it in the library by using the macro. As discussed in San Diego, the other change needed is to add the abi_tag attribute to the new versions of path::u8string and path::generic_u8string, so that the mangling is different when its return type is different: #ifdef _GLIBCXX_USE_CHAR8_T __attribute__((__abi_tag__("__u8"))) std::u8string u8string() const; #else std::stringu8string() const; #endif // _GLIBCXX_USE_CHAR8_T Otherwise we get ODR violations when linking objects compiled with -fchar8_t enabled to objects with it disabled (e.g. linking -std=c++17 objects to -std=c++2a objects, which needs to work). I suggest "__u8" as the name of the ABI tag, but I'm open to other suggestions. "__char8_t" is a bit long and verbose. "__cxx20" would be consistent with "__cxx11" used for the new ABI introduced in GCC 5 but it regularly confuses people who think it is coupled to the -std=c++11 option (and so don't understand why they still see it for -std=c++14). Also, I see that you've made changes to (to add the experimental::u8string_view typedef) and to std::experimental::path (to change the return type of u8string and generic_u8string). The former change is fairly harmless; it only adds a typedef, albeit one which is not a reserved name in C++14/C++17 and so should be available for users to define as a macro. Maybe prior to C++2a we should only define it when GNU extensions are enabled (i.e. when using -std=gnu++14 not -std=c++14): #if defined _GLIBCXX_USE_CHAR8_T \ && (__cplusplus > 201703L || !defined __STRICT_ANSI__) using u8string_view = basic_string_view; #endif Changing the return type of experimental::path members concerns me more. That's a published TS which is not going to be revised, and it's not obvious to me that users would want the change in semantics. If somebody is still using the Filesystem TS in C++2a code, they're probably not expecting it to change. If they need to update their code for C++2a they might as well just use std::filesystem, and so having char8_t support in std::experimental::filesystem isn't clearly useful.
Re: [PATCH] correct __clear_cache signature
Hi Martin, On Wed, Feb 06, 2019 at 05:28:08PM -0700, Martin Sebor wrote: > void > -__clear_cache (char *beg __attribute__((__unused__)), > -char *end __attribute__((__unused__))) > +__clear_cache (void *beg __attribute__((__unused__)), > +void *end __attribute__((__unused__))) > { > #ifdef CLEAR_INSN_CACHE > - CLEAR_INSN_CACHE (beg, end); > + /* Cast the void* pointers to char* as some implementations > + of the macro assume the pointers can be subtracted from > + one another. */ > + CLEAR_INSN_CACHE ((char *) beg, (char *) end); > #endif /* CLEAR_INSN_CACHE */ > } You can subtract pointers to void in GCC just fine, it's a GNU C extension. See extend.texi: In GNU C, addition and subtraction operations are supported on pointers to @code{void} and on pointers to functions. This is done by treating the size of a @code{void} or of a function as 1. (and libgcc is built with GCC always). Segher
Re: [PATCH][AArch64] Change representation of SABD in RTL
Hi James, On 06/02/19 17:33, James Greenhalgh wrote: On Mon, Feb 04, 2019 at 04:23:32AM -0600, Kyrill Tkachov wrote: Hi all, Richard raised a concern about the RTL we use to represent the AdvSIMD SABD (vector signed absolute difference) instruction. We currently represent it as ABS (MINUS op1 op2). This isn't exactly what SABD does. ABS treats its input as a signed value and returns the absolute of that. For example: (sabd:QI 64 -128) == 192 (unsigned) aka -64 (signed) whereas (minus:QI 64 -128) == 192 (unsigned) aka -64 (signed), (abs ...) of that is 64. A better way to describe the instruction is with MINUS (SMAX (op1 op2) SMIN (op1 op2)). This patch implements that, and also implements similar semantics for the UABD instruction that uses UMAX and UMIN. That way for the example above we'll have: (minus:QI (smax:QI (64 -128)) (smin:QI (64 -128))) == (minus:QI 64 -128) == 192 (or -64 signed) which matches what SABD does. Bootstrapped and tested on aarch64-none-linux-gnu. Ok for trunk? Not without a comment explaining the above subtlety and preferably a testcase which would fail today on trunk. Otherwise, OK. I'll take that as approval with the changes requested. I'll commit the updated patch (attached) later today. Thanks, Kyrill 2019-02-07 Kyrylo Tkachov * config/aarch64/iterators.md (max_opp): New code_attr. (USMAX): New code iterator. * config/aarch64/predicates.md (aarch64_smin): New predicate. (aarch64_smax): Likewise. * config/aarch64/aarch64-simd.md (abd_3): Rename to... (*aarch64_abd_3): ... Change RTL representation to MINUS (MAX MIN). 2019-02-07 Kyrylo Tkachov * gcc.target/aarch64/abd_1.c: New test. * gcc.dg/sabd_1.c: Likewise. James Thanks, Kyrill 2019-04-02 Kyrylo Tkachov * config/aarch64/iterators.md (max_opp): New code_attr. (USMAX): New code iterator. * config/aarch64/predicates.md (aarch64_smin): New predicate. (aarch64_smax): Likewise. * config/aarch64/aarch64-simd.md (abd_3): Rename to... (*aarch64_abd_3): ... Change RTL representation to MINUS (MAX MIN). 2019-04-02 Kyrylo Tkachov * gcc.target/aarch64/abd_1.c: New test. diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 96360d877a404a6795aef5458345e87de9006cf4..eb99d3ab881e29f3069991e4f778be95d51ec4da 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -705,13 +705,22 @@ (define_insn "aarch64_abs" [(set_attr "type" "neon_abs")] ) -(define_insn "abd_3" +;; It's tempting to represent SABD as ABS (MINUS op1 op2). +;; This isn't accurate as ABS treats always its input as a signed value. +;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64. +;; Whereas SABD would return 192 (-64 signed) on the above example. +;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead. +(define_insn "*aarch64_abd_3" [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") - (abs:VDQ_BHSI (minus:VDQ_BHSI - (match_operand:VDQ_BHSI 1 "register_operand" "w") - (match_operand:VDQ_BHSI 2 "register_operand" "w"] - "TARGET_SIMD" - "sabd\t%0., %1., %2." + (minus:VDQ_BHSI + (USMAX:VDQ_BHSI + (match_operand:VDQ_BHSI 1 "register_operand" "w") + (match_operand:VDQ_BHSI 2 "register_operand" "w")) + (match_operator 3 "aarch64_" + [(match_dup 1) + (match_dup 2)])))] + "TARGET_SIMD" + "abd\t%0., %1., %2." [(set_attr "type" "neon_abd")] ) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index e9ede2934a43539279ae68049fb66b84114d90ea..16e4dbda73ab928054590c47a4398408162c0332 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1056,6 +1056,9 @@ (define_mode_attr f16quad [(V2SF "") (V4SF "q")]) (define_code_attr f16mac [(plus "a") (minus "s")]) +;; Map smax to smin and umax to umin. +(define_code_attr max_opp [(smax "smin") (umax "umin")]) + ;; The number of subvectors in an SVE_STRUCT. (define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2") (VNx8SI "2") (VNx4DI "2") @@ -1204,6 +1207,9 @@ (define_code_iterator MAXMIN [smax smin umax umin]) (define_code_iterator FMAXMIN [smax smin]) +;; Signed and unsigned max operations. +(define_code_iterator USMAX [smax umax]) + ;; Code iterator for variants of vector max and min. (define_code_iterator ADDSUB [plus minus]) diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 855cf7b52f840a81e0bbd2b31fd59fc860060626..b8e6d232ff6237a58addda1ec0e953a1054dc616 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -319,6 +319,12 @@ (define_predicate "aarch64_reg_or_imm" (ior (match_operand 0 "register_operand") (match_operand 0 "const_scalar_int_operand"))) +(define_predicate "aarch64_smin" + (match_code "smin")) + +(define_predicate "aarch64_umin" + (match_code "umin")) + ;; True for integer
Re: [REVISED PATCH 7/9]: C++ P0482R5 char8_t: New standard library tests
On 23/12/18 21:27 -0500, Tom Honermann wrote: Attached is a revised patch that addresses changes in P0482R6. Changes from the prior patch include: - Updated the value of the __cpp_char8_t feature test macro to 201811. Tested on x86_64-linux. There are quite a few additional changes needed to make the testsuite pass cleanly with non-default options, e.g. when running it with RUNTESTFLAGS=--target_board=unix/-fchar8_t/-fno-inline I see these failures: FAIL: 21_strings/basic_string/literals/types.cc (test for excess errors) FAIL: 21_strings/basic_string/literals/values.cc (test for excess errors) UNRESOLVED: 21_strings/basic_string/literals/values.cc compilation failed to produce executable FAIL: 21_strings/basic_string_view/literals/types.cc (test for excess errors) FAIL: 21_strings/basic_string_view/literals/values.cc (test for excess errors) UNRESOLVED: 21_strings/basic_string_view/literals/values.cc compilation failed to produce executable FAIL: 22_locale/codecvt/char16_t.cc (test for excess errors) UNRESOLVED: 22_locale/codecvt/char16_t.cc compilation failed to produce executable FAIL: 22_locale/codecvt/char32_t.cc (test for excess errors) UNRESOLVED: 22_locale/codecvt/char32_t.cc compilation failed to produce executable FAIL: 22_locale/codecvt/codecvt_utf8/79980.cc (test for excess errors) UNRESOLVED: 22_locale/codecvt/codecvt_utf8/79980.cc compilation failed to produce executable FAIL: 22_locale/codecvt/codecvt_utf8/wchar_t/1.cc (test for excess errors) UNRESOLVED: 22_locale/codecvt/codecvt_utf8/wchar_t/1.cc compilation failed to produce executable FAIL: 22_locale/codecvt/utf8.cc (test for excess errors) UNRESOLVED: 22_locale/codecvt/utf8.cc compilation failed to produce executable FAIL: 22_locale/conversions/string/2.cc (test for excess errors) UNRESOLVED: 22_locale/conversions/string/2.cc compilation failed to produce executable FAIL: 22_locale/conversions/string/3.cc (test for excess errors) UNRESOLVED: 22_locale/conversions/string/3.cc compilation failed to produce executable FAIL: experimental/string_view/literals/types.cc (test for excess errors) FAIL: experimental/string_view/literals/values.cc (test for excess errors) UNRESOLVED: experimental/string_view/literals/values.cc compilation failed to produce executable There would be similar errors running all the tests with -std=c++2a, which is definitely something I do often and so want the tests to be clean. We can either disable those tests when char8_t is enabled (because we already have alternative tests checking the char8_t versions of string_view etc.) or make them work either way, which the attached patch begins doing (more changes are needed). I expect a different set of failures for -fno-char8_t (which is probably a less important case to support that enabling char8_t in older standards, but maybe still worth testing now and then). commit fb345c6806bd4f1efdb5937e083076e79f2086dd Author: Jonathan Wakely Date: Thu Feb 7 09:04:11 2019 + fixup tests diff --git a/libstdc++-v3/testsuite/22_locale/conversions/string/2.cc b/libstdc++-v3/testsuite/22_locale/conversions/string/2.cc index e83cf98e4c7..2e97b4f3ab0 100644 --- a/libstdc++-v3/testsuite/22_locale/conversions/string/2.cc +++ b/libstdc++-v3/testsuite/22_locale/conversions/string/2.cc @@ -40,14 +40,14 @@ void test01() typedef str_conv sc; const sc::byte_string berr = "invalid wide string"; - const sc::wide_string werr = u8"invalid byte string"; + const sc::wide_string werr = "invalid byte string"; sc c(berr, werr); string input = "Stop"; input += char(0xFF); string woutput = c.from_bytes(input); VERIFY( input == woutput ); // noconv case doesn't detect invalid input - string winput = u8"Stop"; + string winput = "Stop"; winput += char(0xFF); string output = c.to_bytes(winput); VERIFY( winput == output ); // noconv case doesn't detect invalid input diff --git a/libstdc++-v3/testsuite/29_atomics/headers/atomic/macros.cc b/libstdc++-v3/testsuite/29_atomics/headers/atomic/macros.cc index 144c8582a41..ac03f362411 100644 --- a/libstdc++-v3/testsuite/29_atomics/headers/atomic/macros.cc +++ b/libstdc++-v3/testsuite/29_atomics/headers/atomic/macros.cc @@ -31,9 +31,18 @@ # error "ATOMIC_CHAR_LOCK_FREE must be 1 or 2" #endif +#ifdef _GLIBCXX_USE_CHAR8_T +# ifndef ATOMIC_CHAR8_T_LOCK_FREE +# error "ATOMIC_CHAR8_T_LOCK_FREE must be a macro" +# elif ATOMIC_CHAR8_T_LOCK_FREE != 1 && ATOMIC_CHAR8_T_LOCK_FREE != 2 +# error "ATOMIC_CHAR8_T_LOCK_FREE must be 1 or 2" +# endif +#endif + #ifndef ATOMIC_CHAR16_T_LOCK_FREE # error "ATOMIC_CHAR16_T_LOCK_FREE must be a macro" #elif ATOMIC_CHAR16_T_LOCK_FREE != 1 && ATOMIC_CHAR16_T_LOCK_FREE != 2 +# error "ATOMIC_CHAR16_T_LOCK_FREE must be 1 or 2" #endif #ifndef ATOMIC_CHAR32_T_LOCK_FREE diff --git a/libstdc++-v3/testsuite/29_atomics/headers/atomic/types_std_c++0x.cc b/libstdc++-v3/testsuite/29_atomics/headers/atomic/types_std_c++0x.cc index c53bb24fa49..47084c4a33a 100644 --- a/libst
[PATCH] S/390: Introduce jdd constraint
Bootstrapped and regtested on s390x-redhat-linux. Implementation of section anchors in S/390 back-end added in r266741 broke jump labels in S/390 Linux kernel [1]. Currently jump labels pass global variable addresses to .quad directive in inline assembly using "X" constraint. In the past this used to produce regular symbol references, however, after r266741 we sometimes get values like (plus (reg) (const_int)), where (reg) points to a section anchor. Strictly speaking, this is still correct, since "X" accepts anything. Thus, now we need another way to support jump labels. The existing "i" constraint cannot be used, since with -fPIC it must not accept non-local symbols, however, jump labels do require that, e.g. __tracepoint_xdp_exception from kernel proper might be referenced from kernel modules. The existing "ZL" constraint cannot be used for the same reason. The existing "b" constraint cannot be used because of the way expand_asm_stmt works. It deduces whether the constraint allows regs, subregs or mems, and processes asm operands differently based on that. "b" is supposed to accept values like (mem (symbol_ref)), and there appears to be no way to explain to expand_asm_stmt that for "b" mem's address must not be in a register. This patch introduces the new machine-specific constraint named "jdd" - "j" prefix is already used for constants, and "d" stands for "data". It accepts anything that fits into the data section, whether or not this might require a relocation, that is, anything that passes CONSTANT_P check. [1] https://lkml.org/lkml/2019/1/23/346 gcc/ChangeLog: 2019-02-06 Ilya Leoshkevich * config/s390/constraints.md (jdd): New constraint. gcc/testsuite/ChangeLog: 2019-02-06 Ilya Leoshkevich * gcc.target/s390/jump-label.c: New test. --- gcc/config/s390/constraints.md | 17 + gcc/testsuite/gcc.target/s390/jump-label.c | 19 +++ 2 files changed, 36 insertions(+) create mode 100644 gcc/testsuite/gcc.target/s390/jump-label.c diff --git a/gcc/config/s390/constraints.md b/gcc/config/s390/constraints.md index 688dd96e0e2..4055cbc7c68 100644 --- a/gcc/config/s390/constraints.md +++ b/gcc/config/s390/constraints.md @@ -37,6 +37,7 @@ ;; jKK: constant vector with all elements having the same value and ;; matching K constraint ;; jm6: An integer operand with the lowest order 6 bits all ones. +;; jdd: A constant operand that fits into the data section. ;;t -- Access registers 36 and 37. ;;v -- Vector registers v0-v31. ;;C -- A signed 8-bit constant (-128..127) @@ -567,3 +568,19 @@ (define_constraint "ZL" "LARL operand when in 64-bit mode, otherwise nothing." (match_test "TARGET_64BIT && larl_operand (op, VOIDmode)")) + +;; This constraint must behave like "i", in particular, the matching values +;; must never be placed into registers or memory by +;; cfgexpand.c:expand_asm_stmt. It could be straightforward to start its name +;; with a letter from genpreds.c:const_int_constraints, however it would +;; require using (match_code "const_int"), which is infeasible. To achieve the +;; same effect, that is, setting maybe_allows_reg and maybe_allows_mem to false +;; in genpreds.c:add_constraint, we explicitly exclude reg, subreg and mem +;; codes. +(define_constraint "jdd" + "A constant operand that fits into the data section. + Usage of this constraint might produce a relocation." + (and (not (match_code "reg")) + (not (match_code "subreg")) + (not (match_code "mem")) + (match_test "CONSTANT_P (op)"))) diff --git a/gcc/testsuite/gcc.target/s390/jump-label.c b/gcc/testsuite/gcc.target/s390/jump-label.c new file mode 100644 index 000..3de73f6bb6c --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/jump-label.c @@ -0,0 +1,19 @@ +/* Test jdd constraint, which is used for linux kernel jump labels. */ + +/* { dg-do link } */ +/* { dg-options "-O2 -fPIC -shared" } */ + +__attribute__ ((visibility ("default"))) extern int i; + +void f (void) +{ + asm goto (".pushsection foo\n" +#if defined(__s390x__) +".quad %0-.\n" +#else +".long %0-.\n" +#endif +".popsection\n" +: : "jdd" (&i) : : l); +l:; +} -- 2.20.1
Re: [PATCH][wwwdocs][Arm][AArch64] Update changes with new features and flags.
On Wed, 6 Feb 2019, Tamar Christina wrote: > I've updated the patch with your suggested changes and have grouped > the Arm and AArch64 targets a bit. Thanks, Tamar! > Ok for commit? Yes, I had meant to imply this in my original review. :-) Only note is that this seems to hop from to , skipping ? Gerald
RE: [PATCH][wwwdocs][Arm][AArch64] Update changes with new features and flags.
Hi Gerard, > > On Wed, 6 Feb 2019, Tamar Christina wrote: > > I've updated the patch with your suggested changes and have grouped > > the Arm and AArch64 targets a bit. > > Thanks, Tamar! > > > Ok for commit? > > Yes, I had meant to imply this in my original review. :-) > > Only note is that this seems to hop from to , skipping ? Yeah, the h4 heading was still a bit big, so visually it didn't look quite like a "child" entry. So I skipped on level. Cheers, Tamar > > Gerald
RE: [PATCH][GCC][AArch64] Have empty HWCAPs string ignored during native feature detection
Hi All, Since this hasn't been reviewed yet anyway I've updated this patch to also fix the memory leaks etc. -- This patch makes the feature detection code for AArch64 GCC not add features automatically when the feature had no hwcaps string to match against. This means that -mcpu=native no longer adds feature flags such as +profile. The behavior wasn't noticed before because at the time +profile was added a bug was preventing any feature bits from being added by native detections. The loop has also been changed as Jakub specified in order to avoid a memory leak that was present in the existing code and to be slightly more efficient. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for trunk? Thanks, Tamar gcc/ChangeLog: 2019-02-07 Tamar Christina PR target/88530 * config/aarch64/aarch64-option-extensions.def: Document it. * config/aarch64/driver-aarch64.c (host_detect_local_cpu): Skip feature if empty hwcaps. gcc/testsuite/ChangeLog: 2019-02-07 Tamar Christina PR target/88530 * gcc.target/aarch64/options_set_10.c: New test. > -Original Message- > From: gcc-patches-ow...@gcc.gnu.org > On Behalf Of Tamar Christina > Sent: Wednesday, January 30, 2019 14:48 > To: Jakub Jelinek > Cc: Kyrill Tkachov ; gcc-patches@gcc.gnu.org; > nd ; James Greenhalgh ; > Richard Earnshaw ; Marcus Shawcroft > > Subject: Re: [PATCH][GCC][AArch64] Have empty HWCAPs string ignored > during native feature detection > > Hi Jakub, > > On Wed, Jan 30, 2019 at 02:06:01PM +, Tamar Christina wrote: > > > Thanks for the feedback, but I think those are changes for another patch. > > > > At least the memory leak is something that should be fixed even in > > stage4 IMNSHO. > > I'll provide a separate patch for this then. > > > Anyway, will defer to aarch64 maintainers here. > > > Just one question, for the *feat_string == '\0' case, is continue what > > you want, rather than just enabled = false; and doing the > > extension_flags &= ~(aarch64_extensions[i].flag); > > later on? > > Yeah, because the feature may be on by default due to another extension, in > which case you would erroneously turn it off. The absence of an HWCAPS > shouldn't pro-actively disable an extension. > > Regards, > Tamar diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 50909debf5455d57b86db91a0a5539fed1d5b91e..a6b3ae2b73f285e62e0f24c92bc5efefc9ffb59c 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -43,7 +43,8 @@ the extension (for example, the 'crypto' extension depends on four entries: aes, pmull, sha1, sha2 being present). In that case this field should contain a space (" ") separated list of the strings in 'Features' - that are required. Their order is not important. */ + that are required. Their order is not important. An empty string means + do not detect this feature during auto detection. */ /* Enabling "fp" just enables "fp". Disabling "fp" also disables "simd", "crypto", "fp16", "aes", "sha2", diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c index 05f1360d48583473820c8008cc09ed139bddc0ce..9515061ec985cc374c7c510c6954a6a3c240814f 100644 --- a/gcc/config/aarch64/driver-aarch64.c +++ b/gcc/config/aarch64/driver-aarch64.c @@ -250,27 +250,35 @@ host_detect_local_cpu (int argc, const char **argv) { for (i = 0; i < num_exts; i++) { - char *p = NULL; - char *feat_string - = concat (aarch64_extensions[i].feat_string, NULL); + const char *p = aarch64_extensions[i].feat_string; + + /* If the feature contains no HWCAPS string then ignore it for the + auto detection. */ + if (*p == '\0') + continue; + bool enabled = true; /* This may be a multi-token feature string. We need - to match all parts, which could be in any order. - If this isn't a multi-token feature string, strtok is - just going to return a pointer to feat_string. */ - p = strtok (feat_string, " "); - while (p != NULL) + to match all parts, which could be in any order. */ + size_t len = strlen (buf); + do { - if (strstr (buf, p) == NULL) + const char *end = strchr (p, ' '); + if (end == NULL) + end = strchr (p, '\0'); + if (memmem (buf, len, p, end - p) == NULL) { /* Failed to match this token. Turn off the features we'd otherwise enable. */ enabled = false; break; } - p = strtok (NULL, " "); + if (*end == '\0') + break; + p = end + 1; } + while (1); if (enabled) extension_flags |= aarch64_extensions[i].flag; @@ -360,12 +368,12 @@ host_detect_local_cpu (int argc, const char **argv) not_found: { /* If detection fails we ignore the option. - Clean up and return empty string. */ +
[committed][PATCH][GCC][AArch64] Fix initializer for array so it's a C initializer instead of C++
Hi All, This fixes a missing = that would cause the array initializer to be a C++ initializer instead of a C one, causing a warning when building with pre-C++11 standards compiler. Committed under the GCC obvious rules. gcc/ChangeLog: 2019-02-07 Tamar Christina * config/aarch64/aarch64-builtins.c (aarch64_fcmla_lane_builtin_data): Make it a C initializer. -- diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index c8f5a555f6724433dc6cea1cff3547c0c66c54a7..d7b1b7bd6867a0f98a2f67fec0fd80a0a08f69c1 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -455,7 +455,7 @@ static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = { /* This structure contains how to manage the mapping form the builtin to the instruction to generate in the backend and how to invoke the instruction. */ -static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] { +static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = { AARCH64_SIMD_FCMLA_LANEQ_BUILTINS };
Re: [rs6000] 64-bit integer loads/stores and FP instructions
Hi! On Wed, Feb 06, 2019 at 11:08:44PM +0100, Eric Botcazou wrote: > as reported e.g. at https://gcc.gnu.org/ml/gcc-help/2018-11/msg00038.html, > the > 7 series of compilers started to use FP instructions for simple 64-bit > integer > loads/stores in unexpected ways. Consider: > The difference between GCC 7 and GCC 8 comes from this change: > https://gcc.gnu.org/ml/gcc-patches/2018-06/msg00584.html > which seems to imply that the first change was problematic but which was not > backported to the 7 branch. > > So at a minimum I think that the second change should be backported to the 7 > branch; it applies (almost) cleanly and gives no regressions on PowerPC/Linux. > > Backport from mainline > 2018-06-11 Segher Boessenkool > > PR target/85755 > * config/rs6000/rs6000.md (*movdi_internal32): Put constraint modifiers > on the correct operand. > (*movdi_internal64): Ditto. Backporting this is okay. (It was not done because it does not affect correctness). What is the "almost", btw? > But I also think that another part of the first change is problematic, namely > the removal of the '*' modifier on the alternatives, which means that the > register preference of the instruction isn't skewed towards integer registers > any more. (In https://gcc.gnu.org/ml/gcc-patches/2016-11/txt4j9hLRLCzf.txt and you presumably mean the *movdi_internal32 hunk, and the first line of it: - "=Y,r, r, ?m,?*d,?*d, + "=Y,r, r, ^m,^d, ^d, where the last three are mem<-reg, reg<-mem, reg<-reg, for fp regs). This patch was a year before we switched to LRA, and for non-LRA ports * does not do any register preferencing. > That may be OK for native targets, but that is frowned upon for > embedded targets, where people are really unhappy when the compiler emits > floating-point instructions for pure integer code for no apparent reason. For 6xx/7xx people *wanted* to use FP loads and stores whenever possible, because you get twice the bandwidth with them. If you do not want the FP registers used (or FP insns used), use -msoft-float, that's what it's for. Patches are welcome, but complicating this already very complex code by introducing an arbitrary distinction between "embedded and other" (or actually, "server and other") isn't the way to go. Sorry. Could you open a PR please? Segher
Re: [PATCH] Integration of parallel standard algorithms for c++17
On 31/01/19 21:08 -0800, Thomas Rodgers wrote: Update C++17 parallel algorithms to LLVM/MIT licensed upstream sources Some lines in bits/c++config.h need to be split before 80 columns (with a backslash if the split is in the middle of a preprocessor condition obviously). There are loads of very long lines in the actual PSTL headers too, but I think we need to keep them similar to the upstream headers to aid merging, so can't gratuitously reformat them. I'm assuming that doesn't apply to since that's our header and any changes will need to be manually applied anyway, right? We'll need to add a copy of the LICENSE.TXT file to our sources, since it's referred to by the comments at the top of each PSTL file. Typo in include/Makefile.am: ${pstl_srcdir}/memoy_impl.h (Which will require regenerating include/Makefile.in). The __cpp_lib_parallel_algorithm macro needs to be (conditionally) defined in as well as and . The namespaces par_backend and unseq_backend need to be uglified. ALL the names in the __pstl::internal namespace need to be uglified: brick_any_of pattern_any_of for_each_n_it_serial brick_walk1 pattern_walk1 etc. I see quite a few calls to functions that should probably be qualified to prevent ADL, but that can be fixed later (and upstream first): return brick_count(... return except_handler([&]() { ... The DejaGnu directives in the pstl tests need to be in this order: // { dg-options "-std=gnu++17 -ltbb" } // { dg-do run { target c++17 } } // { dg-require-effective-target tbb-backend } Currently the dg-require-effective-target comes before the dg-do line, so doesn't work. DejaGnu processes the lines in order. When it sees the dg-do for target c++17 it overrides the effect of any earlier dg-require-effective-target. I fixed that locally like so: find testsuite/20_util/specialized_algorithms/pstl/ testsuite/25_algorithms/pstl/ testsuite/26_numerics/pstl/ -name \*.cc | xargs sed -i '/dg-require-eff/{h;d};/dg-do/{p;x}' And now I don't get FAIL results on a system with no TBB installed (which is great). I also get no FAIL results on a system with TBB installed (also great). There are two copies (with slight differences) of each of the uninitialized_xxx tests: libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/specialized.algorithms/uninitialized_construct.cc | 128 +++ libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/specialized.algorithms/uninitialized_copy_move.cc | 143 libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/specialized.algorithms/uninitialized_fill_destroy.cc | 103 ++ libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/uninitialized_construct.cc | 132 libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/uninitialized_copy_move.cc | 154 + libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/uninitialized_fill_destroy.cc | 104 ++ That's all for now, but I'll keep making passes over it, going into more detail ...
Re: [Patch] [arm] Fix 88714, Arm LDRD/STRD peepholes
>> > > Please add the PR marker to the testsuite ChangeLog as well. > I've been following this PR a bit from the sidelines, I believe a > substantial amount of code > (and one of the testcases) was written by Jakub, so please add him to > the ChangeLog entries as well. > > This looks ok to me. > Thanks for fixing this and thanks Jakub for the analysis and fixes too! > > Kyrill > Thanks Kyrill, I've committed with the changelog below. gcc/ChangeLog: 2019-02-07 Matthew Malcomson Jakub Jelinek PR bootstrap/88714 * config/arm/arm-protos.h (valid_operands_ldrd_strd, arm_count_ldrdstrd_insns): New declarations. * config/arm/arm.c (mem_ok_for_ldrd_strd): Remove broken handling of MINUS. (valid_operands_ldrd_strd): New function. (arm_count_ldrdstrd_insns): New function. * config/arm/ldrdstrd.md: Change peepholes to generate PARALLEL SImode sets instead of single DImode set and define new insns to match this. gcc/testsuite/ChangeLog: 2019-02-07 Matthew Malcomson Jakub Jelinek PR bootstrap/88714 * gcc.c-torture/execute/pr88714.c: New test. * gcc.dg/rtl/arm/ldrd-peepholes.c: New test.
Backports to 8.x branch
Hi! Another month have passed since my last 8.x backporting effort, thus I've backported following 32 patches from trunk to 8.x, bootstrapped/regtested on x86_64-linux and i686-linux and committed. Jakub 2019-02-07 Jakub Jelinek Backported from mainline 2019-01-07 Jakub Jelinek PR debug/88723 * dwarf2out.c (const_ok_for_output_1): Remove redundant call to const_not_ok_for_debug_p target hook. (mem_loc_descriptor) : Only call const_ok_for_output_1 on UNSPEC and subexpressions thereof if all subexpressions of the UNSPEC are CONSTANT_P. 2019-01-05 Jakub Jelinek PR debug/88635 * dwarf2out.c (const_ok_for_output_1): Reject MINUS that contains SYMBOL_REF, CODE_LABEL or UNSPEC in subexpressions of second argument. Reject PLUS that contains SYMBOL_REF, CODE_LABEL or UNSPEC in subexpressions of both operands. (mem_loc_descriptor): Handle UNSPEC if target hook acks it and all the subrtxes are CONSTANT_P. * gcc.dg/debug/dwarf2/pr88635.c: New test. --- gcc/dwarf2out.c (revision 267593) +++ gcc/dwarf2out.c (revision 267594) @@ -14401,6 +14401,10 @@ expansion_failed (tree expr, rtx rtl, ch } } +/* True if handling a former CONST by mem_loc_descriptor piecewise. */ + +static bool in_const_p; + /* Helper function for const_ok_for_output. */ static bool @@ -14423,6 +14427,7 @@ const_ok_for_output_1 (rtx rtl) one in a constant pool entry, so testing SYMBOL_REF_TLS_MODEL rather than DECL_THREAD_LOCAL_P is not just an optimization. */ if (flag_checking + && !in_const_p && (XVECLEN (rtl, 0) == 0 || GET_CODE (XVECEXP (rtl, 0, 0)) != SYMBOL_REF || SYMBOL_REF_TLS_MODEL (XVECEXP (rtl, 0, 0)) == TLS_MODEL_NONE)) @@ -14446,13 +14451,6 @@ const_ok_for_output_1 (rtx rtl) if (CONST_POLY_INT_P (rtl)) return false; - if (targetm.const_not_ok_for_debug_p (rtl)) -{ - expansion_failed (NULL_TREE, rtl, - "Expression rejected for debug by the backend.\n"); - return false; -} - /* FIXME: Refer to PR60655. It is possible for simplification of rtl expressions in var tracking to produce such expressions. We should really identify / validate expressions @@ -14465,6 +14463,41 @@ const_ok_for_output_1 (rtx rtl) case NOT: case NEG: return false; +case PLUS: + { + /* Make sure SYMBOL_REFs/UNSPECs are at most in one of the + operands. */ + subrtx_var_iterator::array_type array; + bool first = false; + FOR_EACH_SUBRTX_VAR (iter, array, XEXP (rtl, 0), ALL) + if (SYMBOL_REF_P (*iter) + || LABEL_P (*iter) + || GET_CODE (*iter) == UNSPEC) + { + first = true; + break; + } + if (!first) + return true; + FOR_EACH_SUBRTX_VAR (iter, array, XEXP (rtl, 1), ALL) + if (SYMBOL_REF_P (*iter) + || LABEL_P (*iter) + || GET_CODE (*iter) == UNSPEC) + return false; + return true; + } +case MINUS: + { + /* Disallow negation of SYMBOL_REFs or UNSPECs when they + appear in the second operand of MINUS. */ + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, XEXP (rtl, 1), ALL) + if (SYMBOL_REF_P (*iter) + || LABEL_P (*iter) + || GET_CODE (*iter) == UNSPEC) + return false; + return true; + } default: return true; } @@ -15608,6 +15641,7 @@ mem_loc_descriptor (rtx rtl, machine_mod pool. */ case CONST: case SYMBOL_REF: +case UNSPEC: if (!is_a (mode, &int_mode) || (GET_MODE_SIZE (int_mode) > DWARF2_ADDR_SIZE #ifdef POINTERS_EXTEND_UNSIGNED @@ -15615,6 +15649,43 @@ mem_loc_descriptor (rtx rtl, machine_mod #endif )) break; + + if (GET_CODE (rtl) == UNSPEC) + { + /* If delegitimize_address couldn't do anything with the UNSPEC, we +can't express it in the debug info. This can happen e.g. with some +TLS UNSPECs. Allow UNSPECs formerly from CONST that the backend +approves. */ + bool not_ok = false; + + if (!in_const_p) + break; + + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, rtl, ALL) + if (*iter != rtl && !CONSTANT_P (*iter)) + { + not_ok = true; + break; + } + + if (not_ok) + break; + + FOR_EACH_SUBRTX_VAR (iter, array, rtl, ALL) + if (!const_ok_for_output_1 (*iter)) + { + not_ok = true; + break; + } + + if (not_ok) + break; + + rtl = gen_rtx_CONST (GET_MODE (rtl), rtl);
Re: [PATCH] correct __clear_cache signature
On 2/7/19 2:46 AM, Segher Boessenkool wrote: Hi Martin, On Wed, Feb 06, 2019 at 05:28:08PM -0700, Martin Sebor wrote: void -__clear_cache (char *beg __attribute__((__unused__)), - char *end __attribute__((__unused__))) +__clear_cache (void *beg __attribute__((__unused__)), + void *end __attribute__((__unused__))) { #ifdef CLEAR_INSN_CACHE - CLEAR_INSN_CACHE (beg, end); + /* Cast the void* pointers to char* as some implementations + of the macro assume the pointers can be subtracted from + one another. */ + CLEAR_INSN_CACHE ((char *) beg, (char *) end); #endif /* CLEAR_INSN_CACHE */ } You can subtract pointers to void in GCC just fine, it's a GNU C extension. See extend.texi: In GNU C, addition and subtraction operations are supported on pointers to @code{void} and on pointers to functions. This is done by treating the size of a @code{void} or of a function as 1. (and libgcc is built with GCC always). Sure. The macro is a no-op on x86_64 where I tested so I didn't want to take any chances this late in stage 4 and end up breaking someone's builds. Martin
[Committed] S/390: Fix the vec_xl / vec_xst style builtins
This patch fixes several problems with the vec_xl/vec_xst builtins: - vec_xl/vec_xst needs to use the alignment of the scalar memory operand for the vector type reference. This is required to emit the proper vl/vst alignment hints. - vec_xl / vec_xld2 / vec_xlw4 should accept const pointer source operands - vec_xlw4 / vec_xstw4 needs to accept float memory operands Bootstrapped and regression tested on s390x. gcc/ChangeLog: 2019-02-07 Andreas Krebbel * config/s390/s390-builtin-types.def: Add new types. * config/s390/s390-builtins.def: (s390_vec_xl, s390_vec_xld2) (s390_vec_xlw4): Make the memory operand into a const pointer. (s390_vec_xld2, s390_vec_xlw4): Add a variant for single precision float. * config/s390/s390-c.c (s390_expand_overloaded_builtin): Generate a new vector type with the alignment of the scalar memory operand. gcc/testsuite/ChangeLog: 2019-02-07 Andreas Krebbel * gcc.target/s390/zvector/xl-xst-align-1.c: New test. * gcc.target/s390/zvector/xl-xst-align-2.c: New test. --- gcc/config/s390/s390-builtin-types.def | 14 - gcc/config/s390/s390-builtins.def | 65 +++--- gcc/config/s390/s390-c.c | 28 +++--- .../gcc.target/s390/zvector/xl-xst-align-1.c | 45 +++ .../gcc.target/s390/zvector/xl-xst-align-2.c | 48 5 files changed, 161 insertions(+), 39 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/zvector/xl-xst-align-1.c create mode 100644 gcc/testsuite/gcc.target/s390/zvector/xl-xst-align-2.c diff --git a/gcc/config/s390/s390-builtin-types.def b/gcc/config/s390/s390-builtin-types.def index e749d6d..58d920c 100644 --- a/gcc/config/s390/s390-builtin-types.def +++ b/gcc/config/s390/s390-builtin-types.def @@ -260,6 +260,7 @@ DEF_FN_TYPE_2 (BT_FN_V4SF_FLT_INT, BT_V4SF, BT_FLT, BT_INT) DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR) DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF) DEF_FN_TYPE_2 (BT_FN_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI) +DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDCONSTPTR, BT_V4SI, BT_INT, BT_VOIDCONSTPTR) DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDPTR, BT_V4SI, BT_INT, BT_VOIDPTR) DEF_FN_TYPE_2 (BT_FN_V4SI_UV4SI_UV4SI, BT_V4SI, BT_UV4SI, BT_UV4SI) DEF_FN_TYPE_2 (BT_FN_V4SI_V2DI_V2DI, BT_V4SI, BT_V2DI, BT_V2DI) @@ -492,6 +493,7 @@ DEF_OV_TYPE (BT_OV_USHORT_UV8HI_INT, BT_USHORT, BT_UV8HI, BT_INT) DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI, BT_UV16QI, BT_BV16QI, BT_BV16QI) DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI_INTPTR, BT_UV16QI, BT_BV16QI, BT_BV16QI, BT_INTPTR) DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_UV16QI, BT_UV16QI, BT_BV16QI, BT_UV16QI) +DEF_OV_TYPE (BT_OV_UV16QI_LONG_UCHARCONSTPTR, BT_UV16QI, BT_LONG, BT_UCHARCONSTPTR) DEF_OV_TYPE (BT_OV_UV16QI_LONG_UCHARPTR, BT_UV16QI, BT_LONG, BT_UCHARPTR) DEF_OV_TYPE (BT_OV_UV16QI_UCHAR, BT_UV16QI, BT_UCHAR) DEF_OV_TYPE (BT_OV_UV16QI_UCHARCONSTPTR, BT_UV16QI, BT_UCHARCONSTPTR) @@ -523,6 +525,7 @@ DEF_OV_TYPE (BT_OV_UV16QI_UV8HI_UV8HI_INTPTR, BT_UV16QI, BT_UV8HI, BT_UV8HI, BT_ DEF_OV_TYPE (BT_OV_UV16QI_V16QI, BT_UV16QI, BT_V16QI) DEF_OV_TYPE (BT_OV_UV16QI_V8HI_V8HI, BT_UV16QI, BT_V8HI, BT_V8HI) DEF_OV_TYPE (BT_OV_UV2DI_BV2DI_UV2DI, BT_UV2DI, BT_BV2DI, BT_UV2DI) +DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGCONSTPTR, BT_UV2DI, BT_LONG, BT_ULONGLONGCONSTPTR) DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGPTR, BT_UV2DI, BT_LONG, BT_ULONGLONGPTR) DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG, BT_UV2DI, BT_ULONGLONG) DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONGCONSTPTR, BT_UV2DI, BT_ULONGLONGCONSTPTR) @@ -556,6 +559,8 @@ DEF_OV_TYPE (BT_OV_UV2DI_V2DI, BT_UV2DI, BT_V2DI) DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI, BT_UV4SI, BT_BV4SI, BT_BV4SI) DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI_INTPTR, BT_UV4SI, BT_BV4SI, BT_BV4SI, BT_INTPTR) DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_UV4SI, BT_UV4SI, BT_BV4SI, BT_UV4SI) +DEF_OV_TYPE (BT_OV_UV4SI_LONG_FLTPTR, BT_UV4SI, BT_LONG, BT_FLTPTR) +DEF_OV_TYPE (BT_OV_UV4SI_LONG_UINTCONSTPTR, BT_UV4SI, BT_LONG, BT_UINTCONSTPTR) DEF_OV_TYPE (BT_OV_UV4SI_LONG_UINTPTR, BT_UV4SI, BT_LONG, BT_UINTPTR) DEF_OV_TYPE (BT_OV_UV4SI_UINT, BT_UV4SI, BT_UINT) DEF_OV_TYPE (BT_OV_UV4SI_UINTCONSTPTR, BT_UV4SI, BT_UINTCONSTPTR) @@ -593,6 +598,7 @@ DEF_OV_TYPE (BT_OV_UV4SI_V4SI, BT_UV4SI, BT_V4SI) DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI, BT_UV8HI, BT_BV8HI, BT_BV8HI) DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI_INTPTR, BT_UV8HI, BT_BV8HI, BT_BV8HI, BT_INTPTR) DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_UV8HI, BT_UV8HI, BT_BV8HI, BT_UV8HI) +DEF_OV_TYPE (BT_OV_UV8HI_LONG_USHORTCONSTPTR, BT_UV8HI, BT_LONG, BT_USHORTCONSTPTR) DEF_OV_TYPE (BT_OV_UV8HI_LONG_USHORTPTR, BT_UV8HI, BT_LONG, BT_USHORTPTR) DEF_OV_TYPE (BT_OV_UV8HI_USHORT, BT_UV8HI, BT_USHORT) DEF_OV_TYPE (BT_OV_UV8HI_USHORTCONSTPTR, BT_UV8HI, BT_USHORTCONSTPTR) @@ -626,6 +632,7 @@ DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_V8HI, BT_UV8HI, BT_UV8HI, BT_V8HI) DEF_OV_TYPE (BT_OV_UV8HI_V4SI_V4SI, BT_U
Re: [PATCH] print correct array sizes in errors (PR 87996)
On 2/5/19 4:55 PM, Martin Sebor wrote: On 2/5/19 12:14 PM, Jason Merrill wrote: On 2/5/19 1:46 PM, Martin Sebor wrote: On 2/1/19 7:41 AM, Jason Merrill wrote: On 1/31/19 5:49 PM, Martin Sebor wrote: On 1/30/19 3:15 PM, Jason Merrill wrote: On 1/29/19 7:15 PM, Martin Sebor wrote: + /* Try to convert the original SIZE to a ssizetype. */ + if (orig_size != error_mark_node + && !TYPE_UNSIGNED (TREE_TYPE (orig_size))) + { + if (TREE_CODE (size) == INTEGER_CST + && tree_int_cst_sign_bit (size)) + diagsize = build_converted_constant_expr (ssizetype, size, + tsubst_flags_t ()); + else if (size == error_mark_node + && TREE_CODE (orig_size) == INTEGER_CST + && tree_int_cst_sign_bit (orig_size)) + diagsize = build_converted_constant_expr (ssizetype, orig_size, + tsubst_flags_t ()); + } Using build_converted_constant_expr here looks odd; that's a language-level notion, and we're dealing with compiler internals. fold_convert seems more appropriate. Done. + if (TREE_CONSTANT (size)) + { + if (!diagsize && TREE_CODE (size) == INTEGER_CST) + diagsize = size; + } + else size = osize; } @@ -9732,15 +9758,12 @@ compute_array_index_type_loc (location_t name_loc, if (TREE_CODE (size) == INTEGER_CST) { /* An array must have a positive number of elements. */ - if (!valid_constant_size_p (size)) + if (!diagsize) + diagsize = size; It seems like the earlier hunk here is unnecessary; if size is an INTEGER_CST, it will be unchanged, and so be used for diagsize in the latter hunk without any changes to the earlier location. Actually, why not do all of the diagsize logic down here? There doesn't seem to be anything above that relies on information we will have lost at this point. Sure. Done in the attached revision. - tree osize = size; + /* The original numeric size as seen in the source code after + any substitution and before conversion to size_t. */ + tree origsize = NULL_TREE; Can't you use osize? instantiate_non_dependent_expr doesn't do any actual substitution, it shouldn't change the type of the expression or affect whether it's an INTEGER_CST. I went ahead and reused osize but kept the new name origsize (I assume avoiding introducing a new variable is what you meant). The longer name is more descriptive and also has a comment explaining what it's for (which is why I didn't touch osize initially -- I didn't know enough about what it was for or how it might change). Yep, it was saving the original "size" before conversions. I guess the name change is OK, but please restore the initialization from "size", and drop the added comment on the call to mark_rvalue_use (which is to possibly replace a lambda capture with its constant value). How about passing origsize into valid_array_size_p rather than than always computing diagsize in the caller? I'm not sure I understand what you're asking for here. diagsize is computed from either size or origsize but valid_array_size_p takes just one size (or type) argument. What part do you want to move to valid_array_size_p (and why)? Or are you asking to call fold_convert() in valid_array_size_p instead here? I was thinking of adding an origsize parameter to valid_array_size_p. Jason
[Ada] Fix tasking on SPARC/Linux
The tasking is terminally broken on SPARC/Linux with the mainline compiler. The problem is also visible on other branches if you compile the runtime with assertions enabled. Tested on SPARC64/Linux, applied on all active branches. 2019-02-07 Eric Botcazou * libgnarl/s-linux__sparc.ads (ETIMEDOUT): Set to correct value. -- Eric BotcazouIndex: libgnarl/s-linux__sparc.ads === --- libgnarl/s-linux__sparc.ads (revision 268508) +++ libgnarl/s-linux__sparc.ads (working copy) @@ -70,7 +70,7 @@ package System.Linux is EINVAL: constant := 22; ENOMEM: constant := 12; EPERM : constant := 1; - ETIMEDOUT : constant := 110; + ETIMEDOUT : constant := 60; - -- Signals --
Re: [C++PATCH] [PR86379] do not use TREE_TYPE for USING_DECL_SCOPE
On 2/5/19 8:58 PM, Alexandre Oliva wrote: On Feb 5, 2019, Jason Merrill wrote: On Tue, Feb 5, 2019 at 1:37 AM Alexandre Oliva wrote: On Jan 31, 2019, Jason Merrill wrote: Let's use strip_using_decl instead Aah, nice! Thanks, I'll make the changes, test them, and post a new patch. @@ -13288,7 +13295,8 @@ grok_special_member_properties (tree decl) { tree class_type; - if (!DECL_NONSTATIC_MEMBER_FUNCTION_P (decl)) + if (TREE_CODE (decl) != USING_DECL + && !DECL_NONSTATIC_MEMBER_FUNCTION_P (decl)) return; Is there a reason not to use it here, as well? The using decl will take us to a member of a different class, and this function takes the DECL_CONTEXT of decl and adjusts the properties of that class. If we followed USING_DECLs in decl that early, we'd adjust (again?) the member properties of USING_DECL_SCOPE(original using decl), rather than of DECL_CONTEXT (original using decl) as intended. But a little further in, copy_fn_p will also check DECL_NONSTATIC_MEMBER_FUNCTION_P and crash. It would crash if I hadn't adjusted it to test for USING_DECLs, yes. This function used to do nothing for USING_DECL (since its TREE_TYPE wasn't METHOD_TYPE), right? It should continue to do nothing. I thought I was fixing bugs making certain functions follow USING_DECLs rather than fail to do what it should, because the test on TREE_TYPE happened to not pass. This one I was not so sure about, for the reasons above. I guess it makes sense to return early if it really shouldn't do anything for a USING_DECL, even if it happens to resolve to a method that it would otherwise handle if declared directly in the class. Come to think of it, how about fixing DECL_NONSTATIC_MEMBER_FUNCTION_P to be false for non-functions rather than messing with all these places that use it? I considered that at first, but it seemed to me that this would not bring about the desired behavior in the first functions I touched (lookup_member vs shared_member), and I thought we'd be better off retaining some means to catch incorrect uses of this and other macros on USING_DECLs, so that we can analyze and fix the uses instead of getting accidental behavior, correct or not. OK, that makes sense; it isn't always clear what the right handling of a USING_DECL is. In grok_special_member_properties we want to ignore them (as you do) because the handling there only applies to immediate members, and any inherited properties will be handled when processing the bases. In protected_accessible_p and shared_member_p, if we're left with a USING_DECL after strip_using_decl, we can't give a meaningful answer, and should probably abort; we shouldn't get here with a dependent expression. The other two changes look fine. Jason
Xfail gfortran.dg/pr79966.f90
Hi, I have added the testcase for PR79966 to test that tp_sum is inlined becuase it matters for performance of that benchmark. Sadly it is not inlined anymore because it was inlined due to accounting bug I fixed in January. I do not see how to make inliner to do the inlining here except for setting --param inline-min-speedup=2 which is bit extreme (though i tested estting it to 5 and it does not add too much of code size). Inliner does not see why inlining the function is profitable and previously it did so because of missing multiplication scaling statement time by iteration count. tp_sum has loop iterating 16 times which was accounted as loop iterating once making whole funtion chepaer and making inlining seem more profitable. OK? Honza PR ipa/88711 * gfortran.dg/pr79966.f90: Xfail for time being. Index: ../../gcc/testsuite/gfortran.dg/pr79966.f90 === --- ../../gcc/testsuite/gfortran.dg/pr79966.f90 (revision 268579) +++ ../../gcc/testsuite/gfortran.dg/pr79966.f90 (working copy) @@ -108,5 +108,5 @@ contains call RunTPTests() end program - -! { dg-final { scan-ipa-dump "Inlined tp_sum/\[0-9\]+ into runtptests/\[0-9\]+" "inline" } } +! See PR88711. Inliner is currently not able to figure out that inlining tp_sum is a good idea. +! { dg-final { scan-ipa-dump "Inlined tp_sum/\[0-9\]+ into runtptests/\[0-9\]+" "inline" { xfail spu-*-* } } }
Re: Xfail gfortran.dg/pr79966.f90
Hi Honza, > PR ipa/88711 > * gfortran.dg/pr79966.f90: Xfail for time being. > Index: ../../gcc/testsuite/gfortran.dg/pr79966.f90 > === > --- ../../gcc/testsuite/gfortran.dg/pr79966.f90 (revision 268579) > +++ ../../gcc/testsuite/gfortran.dg/pr79966.f90 (working copy) > @@ -108,5 +108,5 @@ contains > >call RunTPTests() >end program > - > -! { dg-final { scan-ipa-dump "Inlined tp_sum/\[0-9\]+ into > runtptests/\[0-9\]+" "inline" } } > +! See PR88711. Inliner is currently not able to figure out that inlining > tp_sum is a good idea. > +! { dg-final { scan-ipa-dump "Inlined tp_sum/\[0-9\]+ into > runtptests/\[0-9\]+" "inline" { xfail spu-*-* } } } why restrict the xfail to spu only? According to gcc-testresults, the test currently fails everywhere (aarch64, arm*, hppa*, i?86, ia64, m68k, powerpc*, riscv64, sparc*, x86_64). Rainer -- - Rainer Orth, Center for Biotechnology, Bielefeld University
Re: [C++PATCH] [PR87322] move cp_evaluated up to tsubst all lambda parms
On 2/7/19 2:24 AM, Alexandre Oliva wrote: From: Alexandre Oliva A lambda capture variable initialized with a lambda expr taking more than one parameter got us confused. The first problem was that the parameter list was cut short during tsubsting because we tsubsted it with cp_unevaluated_operand. We reset it right after, to tsubst the function body, so I've moved the reset up so that it's in effect while processing the parameters as well. The second problem was that the lambda expr appeared twice, once in a decltype that gave the capture variable its type, and once in its initializer. This caused us to instantiate two separate lambda exprs and closure types, and then to flag that the lambda expr in the initializer could not be converted to the unrelated closure type determined for the capture variable. Recording the tsubsted expr in the local specialization map, and retrieving it for reuse fixed it. Regstrapped on x86_64- and i686-linux-gnu. Ok to install? for gcc/cp/ChangeLog PR c++/87322 * pt.c (tsubst_lambda_expr): Avoid duplicate tsubsting. Move cp_evaluated resetting before signature tsubsting. for gcc/testsuite/ChangeLog PR c++/87322 * g++.dg/cpp1y/pr87322.C: New. --- gcc/cp/pt.c | 18 ++ gcc/testsuite/g++.dg/cpp1y/pr87322.C | 23 +++ 2 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr87322.C diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index b8fbf4046f07..3dffa8d2de88 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -17912,8 +17912,17 @@ tsubst_lambda_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl) tree oldfn = lambda_function (t); in_decl = oldfn; + /* If we have already specialized this lambda expr, reuse it. See + PR c++/86322. */ Wrong PR number. + if (local_specializations) +if (tree r = retrieve_local_specialization (t)) + return r; Hmm, I would expect this to do the wrong thing for pack expansion of a lambda, giving us only one rather than one per element of the expansion. For instance, in lambda-variadic5.C we should get three instantiations of the "print" function template; can you add that check to the test? The cp_evaluated change is OK. Jason
Re: [PATCH] i386: Fix typo in *movoi_internal_avx/movti_internal
On Thu, Feb 7, 2019 at 5:44 AM H.J. Lu wrote: > > PR target/89229 > * config/i386/i386.md (*movoi_internal_avx): Set mode to OI > for TARGET_AVX512VL. > (*movti_internal): Set mode to TI for TARGET_AVX512VL. OK. Thanks, Uros. > --- > gcc/config/i386/i386.md | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 9948f77fca5..c1492363bca 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -1938,7 +1938,7 @@ > (const_string "XI") >(and (eq_attr "alternative" "1") > (match_test "TARGET_AVX512VL")) > -(const_string "XI") > +(const_string "OI") >(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") > (and (eq_attr "alternative" "3") > (match_test "TARGET_SSE_TYPELESS_STORES"))) > @@ -2017,7 +2017,7 @@ > (const_string "XI") >(and (eq_attr "alternative" "3") > (match_test "TARGET_AVX512VL")) > -(const_string "XI") > +(const_string "TI") >(ior (not (match_test "TARGET_SSE2")) > (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") > (and (eq_attr "alternative" "5") > -- > 2.20.1 >
[PING] [PATCH, RFC] Avoid the -D option which is not available install-sh
I'd like to ping for this patch: https://gcc.gnu.org/ml/gcc-patches/2019-01/msg01779.html Thanks Bernd. On 1/31/19 12:38 PM, Bernd Edlinger wrote: > Hi, > > I have an issue with the installation of gcc when configured with > --enable-languages=all > on an arm-target where install-sh is used, and make install fails at > libphobos as follows: > > if test -f $file; then \ > /home/ed/gnu/gcc-9-20190127-0/install-sh -c -m 644 -D $file > /home/ed/gnu/arm-linux-gnueabihf/lib/gcc/armv7l-unknown-linux-gnueabihf/9.0.1/include/d/$file > ; \ > else \ > /home/ed/gnu/gcc-9-20190127-0/install-sh -c -m 644 -D > ../../../../gcc-9-20190127-0/libphobos/libdruntime/$file \ > > /home/ed/gnu/arm-linux-gnueabihf/lib/gcc/armv7l-unknown-linux-gnueabihf/9.0.1/include/d/$file > ; \ > fi ; \ > done > /home/ed/gnu/gcc-9-20190127-0/install-sh: invalid option: -D > /home/ed/gnu/gcc-9-20190127-0/install-sh: invalid option: -D > /home/ed/gnu/gcc-9-20190127-0/install-sh: invalid option: -D > ... > > I have fixed the installation with the attached patch, but when I regenerate > the automake > files using automake-1.15.1 and autoconf-2.69, I have an issue that apparently > the configure.ac must be out of sync, and the the generated files are missing > the option --runstatedir no matter what I do. At least on the source > > RFC, because I am not sure what the --runstatedir option is, and if it is > intentional to remove, > and forgotten to re-generate, or if was intended to add, and forgotten to > check in the > configure.ac. > > Attached patch which fixes the install issue, and removes the --runstatedir > configure option. > Is OK as is, or has anybody an idea how to fix this mess? > > > Thanks > Bernd. >
[committed] [obvious][testsuite] Only run rtl/arm/ldrd-peepholes.c test on arm architecture
My previous patch failed to only run an arm test on arm architecture. This adds that condition to the test. Committed to trunk as obvious. gcc/testsuite/ChangeLog: 2019-02-07 Matthew Malcomson * gcc.dg/rtl/arm/ldrd-peepholes.c: Only run on arm ### Attachment also inlined for ease of reply### diff --git a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c index ff209c5df29765441bbe9481ac8caf7bbc6af8f7..4c3949c0963b8482545df670c31db2d9ec0f26b3 100644 --- a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c +++ b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c @@ -1,4 +1,4 @@ -/* { dg-do compile } */ +/* { dg-do compile { target arm*-*-* } } */ /* { dg-skip-if "Ensure only targetting arm with TARGET_LDRD" { *-*-* } { "-mthumb" } { "" } } */ /* { dg-options "-O3 -marm -fdump-rtl-peephole2" } */ diff --git a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c index ff209c5df29765441bbe9481ac8caf7bbc6af8f7..4c3949c0963b8482545df670c31db2d9ec0f26b3 100644 --- a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c +++ b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c @@ -1,4 +1,4 @@ -/* { dg-do compile } */ +/* { dg-do compile { target arm*-*-* } } */ /* { dg-skip-if "Ensure only targetting arm with TARGET_LDRD" { *-*-* } { "-mthumb" } { "" } } */ /* { dg-options "-O3 -marm -fdump-rtl-peephole2" } */
[PATCH] i386: Add standard scalar operation patterns
Standard scalar operation patterns which preserve the rest of the vector look like (vec_merge:V2DF (vec_duplicate:V2DF (op:DF (vec_select:DF (reg/v:V2DF 85 [ x ]) (parallel [ (const_int 0 [0])])) (reg:DF 87)) (reg/v:V2DF 85 [ x ]) (const_int 1 [0x1])])) Add such pattens to i386 backend and convert VEC_CONCAT patterns to standard standard scalar operation patterns. gcc/ PR target/54855 * simplify-rtx.c (simplify_binary_operation_1): Convert VEC_CONCAT patterns to standard standard scalar operation patterns. * config/i386/sse.md (*_vm3): New. (*_vm3): Likewise. gcc/testsuite/ PR target/54855 * gcc.target/i386/pr54855-1.c: New test. * gcc.target/i386/pr54855-2.c: Likewise. * gcc.target/i386/pr54855-3.c: Likewise. * gcc.target/i386/pr54855-4.c: Likewise. * gcc.target/i386/pr54855-5.c: Likewise. * gcc.target/i386/pr54855-6.c: Likewise. * gcc.target/i386/pr54855-7.c: Likewise. --- gcc/config/i386/sse.md| 45 + gcc/simplify-rtx.c| 49 +++ gcc/testsuite/gcc.target/i386/pr54855-1.c | 16 gcc/testsuite/gcc.target/i386/pr54855-2.c | 15 +++ gcc/testsuite/gcc.target/i386/pr54855-3.c | 14 +++ gcc/testsuite/gcc.target/i386/pr54855-4.c | 14 +++ gcc/testsuite/gcc.target/i386/pr54855-5.c | 16 gcc/testsuite/gcc.target/i386/pr54855-6.c | 14 +++ gcc/testsuite/gcc.target/i386/pr54855-7.c | 14 +++ 9 files changed, 197 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-4.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-6.c create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-7.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5dc0930ac1f..03b6f3369fc 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1719,6 +1719,28 @@ (set_attr "type" "sseadd") (set_attr "mode" "")]) +;; Standard scalar operation patterns which preserve the rest of the +;; vector for combiner. +(define_insn "*_vm3" + [(set (match_operand:VF_128 0 "register_operand" "=x,v") + (vec_merge:VF_128 + (vec_duplicate:VF_128 + (plusminus: + (vec_select: + (match_operand:VF_128 1 "register_operand" "0,v") + (parallel [(const_int 0)])) + (match_operand: 2 "nonimmediate_operand" "xm,vm"))) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "@ + \t{%2, %0|%0, %2} + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "")]) + (define_insn "_vm3" [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 @@ -1773,6 +1795,29 @@ (set_attr "type" "ssemul") (set_attr "mode" "")]) +;; Standard scalar operation patterns which preserve the rest of the +;; vector for combiner. +(define_insn "*_vm3" + [(set (match_operand:VF_128 0 "register_operand" "=x,v") + (vec_merge:VF_128 + (vec_duplicate:VF_128 + (multdiv: + (vec_select: + (match_operand:VF_128 1 "register_operand" "0,v") + (parallel [(const_int 0)])) + (match_operand: 2 "nonimmediate_operand" "xm,vm"))) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "@ + \t{%2, %0|%0, %2} + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sse") + (set_attr "prefix" "orig,vex") + (set_attr "btver2_decode" "direct,double") + (set_attr "mode" "")]) + (define_insn "_vm3" [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 83580a259f3..c32544381d0 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -4023,6 +4023,55 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, return simplify_gen_binary (VEC_SELECT, mode, XEXP (trueop0, 0), gen_rtx_PARALLEL (VOIDmode, vec)); } + + /* Turn + + (vec_concat:V2DF +(op:DF (vec_select:DF (reg/v:V2DF 85 [ x ]) + (parallel [ (const_int 0 [0])])) + (reg:DF 87)) +(vec_select:DF (reg/v:V2DF 85 [ x ]) + (parallel [ (const_int 1 [0x1])]))) + + into standard scalar operation patterns which preserve the + rest of the vector: + + (vec_merge:V2DF +(vec_duplicate:V2DF + (op:DF (vec
Re: [Patch] PR rtl-optimization/87763 - generate more bfi instructions on aarch64
Hi Steve, >> After special cases you could do something like t = mask2 + (HWI_1U << >> shift); >> return t == (t & -t) to check for a valid bfi. > > I am not sure I follow this logic and my attempts to use this did not > work so I kept my original code. It's similar to the initial code in aarch64_bitmask_imm, but rather than adding the lowest bit to the value to verify it is a mask (val + (val & -val)), we use the shift instead. If the shift is exactly right, it reaches the first set bit of the mask. Adding the low bit to a valid mask always results in zero or a single set bit. The standard idiom to check that is t == (t & -t). >> + "bfi\t%0, %1, 0, %P2" >> >> This could emit a width that may be 32 too large in SImode if bit 31 is set >> (there is another use of %P in aarch64.md which may have the same >> issue). > > I am not sure why having bit 31 set would be a problem. Sign > extension? Yes, if bit 31 is set, %P will emit 33 for a 32-bit constant which is obviously wrong. Your patch avoids this for bfi by explicitly computing the correct value. This looks good to me (and creates useful bfi's as expected), but I can't approve. Wilco
[New test fortran, committed] PR 52789 gfortran sets -Wunused-parameter in the C sense as well as the Fortran sense
New test committed as obvious at revision r268656. Dominique
Re: [PATCH] print correct array sizes in errors (PR 87996)
On 2/7/19 9:10 AM, Jason Merrill wrote: On 2/5/19 4:55 PM, Martin Sebor wrote: On 2/5/19 12:14 PM, Jason Merrill wrote: On 2/5/19 1:46 PM, Martin Sebor wrote: On 2/1/19 7:41 AM, Jason Merrill wrote: On 1/31/19 5:49 PM, Martin Sebor wrote: On 1/30/19 3:15 PM, Jason Merrill wrote: On 1/29/19 7:15 PM, Martin Sebor wrote: + /* Try to convert the original SIZE to a ssizetype. */ + if (orig_size != error_mark_node + && !TYPE_UNSIGNED (TREE_TYPE (orig_size))) + { + if (TREE_CODE (size) == INTEGER_CST + && tree_int_cst_sign_bit (size)) + diagsize = build_converted_constant_expr (ssizetype, size, + tsubst_flags_t ()); + else if (size == error_mark_node + && TREE_CODE (orig_size) == INTEGER_CST + && tree_int_cst_sign_bit (orig_size)) + diagsize = build_converted_constant_expr (ssizetype, orig_size, + tsubst_flags_t ()); + } Using build_converted_constant_expr here looks odd; that's a language-level notion, and we're dealing with compiler internals. fold_convert seems more appropriate. Done. + if (TREE_CONSTANT (size)) + { + if (!diagsize && TREE_CODE (size) == INTEGER_CST) + diagsize = size; + } + else size = osize; } @@ -9732,15 +9758,12 @@ compute_array_index_type_loc (location_t name_loc, if (TREE_CODE (size) == INTEGER_CST) { /* An array must have a positive number of elements. */ - if (!valid_constant_size_p (size)) + if (!diagsize) + diagsize = size; It seems like the earlier hunk here is unnecessary; if size is an INTEGER_CST, it will be unchanged, and so be used for diagsize in the latter hunk without any changes to the earlier location. Actually, why not do all of the diagsize logic down here? There doesn't seem to be anything above that relies on information we will have lost at this point. Sure. Done in the attached revision. - tree osize = size; + /* The original numeric size as seen in the source code after + any substitution and before conversion to size_t. */ + tree origsize = NULL_TREE; Can't you use osize? instantiate_non_dependent_expr doesn't do any actual substitution, it shouldn't change the type of the expression or affect whether it's an INTEGER_CST. I went ahead and reused osize but kept the new name origsize (I assume avoiding introducing a new variable is what you meant). The longer name is more descriptive and also has a comment explaining what it's for (which is why I didn't touch osize initially -- I didn't know enough about what it was for or how it might change). Yep, it was saving the original "size" before conversions. I guess the name change is OK, but please restore the initialization from "size", and drop the added comment on the call to mark_rvalue_use (which is to possibly replace a lambda capture with its constant value). How about passing origsize into valid_array_size_p rather than than always computing diagsize in the caller? I'm not sure I understand what you're asking for here. diagsize is computed from either size or origsize but valid_array_size_p takes just one size (or type) argument. What part do you want to move to valid_array_size_p (and why)? Or are you asking to call fold_convert() in valid_array_size_p instead here? I was thinking of adding an origsize parameter to valid_array_size_p. The function has 6 callers and only this one would pass it this extra argument(*). The others pass it the array type, not size (something I did to avoid adding an argument to it), so they would pass it null as this new argument. Adding an argument to the function that's unrelated to its purpose only to simplify one caller doesn't feel right to me. So if I'm not missing something and this was just a suggestion to consider, I would prefer to keep the computation where it is. I have simplified it a bit and eliminated some unnecessary logic. Otherwise, if it's a precondition of approving the patch, please let me know. Attached is the updated patch with the removed comment and with the slightly simpler logic in compute_array_index_type_loc. Martin [*] We only really need one bit: the signedness of the original type. With the simplification its value is no longer used. PR c++/87996 - size of array is negative error when SIZE_MAX/2 < sizeof(array) <= SIZE_MAX gcc/ChangeLog: PR c++/87996 * builtins.c (max_object_size): Move from here... * builtins.h (max_object_size): ...and here... * tree.c (max_object_size): ...to here... * tree.h (max_object_size): ...and here. gcc/c-family/ChangeLog: PR c++/87996 * c-common.c (invalid_array_size_error): New function. (valid_array_size_p): Call it. Handle size as well as type. * c-common.h (valid_constant_size_p): New function. (enum cst_size_error): New type. gcc/cp/ChangeLog: PR c++/87996 * decl.c (compute_array_inde
[PATCH] Fix more ICEs in -fsave-optimization-record (PR tree-optimization/89235)
PR tree-optimization/89235 reports an ICE inside -fsave-optimization-record whilst reporting the inlining chain of of the location_t in the vect_location global. This is very similar to PR tree-optimization/86637, fixed in r266821. The issue is that the inlining chains are read from the location_t's ad-hoc data, referencing GC-managed tree blocks, but the former are not GC roots; it's simply assumed that old locations referencing dead blocks never get used again. The fix is to reset the "vect_location" global in more places. Given that is a somewhat subtle detail, the patch adds a sentinel class to reset vect_location at the end of a scope. Doing it as a class simplifies the task of ensuring that the global is reset on every exit path from a function, and also gives a good place to signpost the above subtlety (in the documentation for the class). The patch also adds test cases for both of the PRs mentioned above. Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. OK for trunk? gcc/testsuite/ChangeLog: PR tree-optimization/86637 PR tree-optimization/89235 * gcc.c-torture/compile/pr86637-1.c: New test. * gcc.c-torture/compile/pr86637-2.c: New test. * gcc.c-torture/compile/pr86637-3.c: New test. * gcc.c-torture/compile/pr89235.c: New test. gcc/ChangeLog: PR tree-optimization/86637 PR tree-optimization/89235 * tree-vect-loop.c (optimize_mask_stores): Add an auto_purge_vect_location sentinel to ensure that vect_location is purged on exit. * tree-vectorizer.c (auto_purge_vect_location::~auto_purge_vect_location): New dtor. (try_vectorize_loop_1): Add an auto_purge_vect_location sentinel to ensure that vect_location is purged on exit. (pass_slp_vectorize::execute): Likewise, replacing the manual reset. * tree-vectorizer.h (class auto_purge_vect_location): New class. --- gcc/testsuite/gcc.c-torture/compile/pr86637-1.c | 13 +++ gcc/testsuite/gcc.c-torture/compile/pr86637-2.c | 128 gcc/testsuite/gcc.c-torture/compile/pr86637-3.c | 11 ++ gcc/testsuite/gcc.c-torture/compile/pr89235.c | 57 +++ gcc/tree-vect-loop.c| 1 + gcc/tree-vectorizer.c | 13 ++- gcc/tree-vectorizer.h | 18 7 files changed, 239 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr86637-1.c create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr86637-2.c create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr86637-3.c create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr89235.c diff --git a/gcc/testsuite/gcc.c-torture/compile/pr86637-1.c b/gcc/testsuite/gcc.c-torture/compile/pr86637-1.c new file mode 100644 index 000..61b6381 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr86637-1.c @@ -0,0 +1,13 @@ +/* { dg-options "-fsave-optimization-record -ftree-slp-vectorize --param ggc-min-expand=1 --param ggc-min-heapsize=1024" } */ + +void +en (void) +{ +} + +void +n4 (int zb) +{ + while (zb < 1) +++zb; +} diff --git a/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c b/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c new file mode 100644 index 000..3b675ea --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c @@ -0,0 +1,128 @@ +/* { dg-do compile { target fgraphite } } */ +/* { dg-options "-floop-parallelize-all -fsave-optimization-record -ftree-parallelize-loops=2 -ftree-slp-vectorize" } */ + +#include +#include + +signed char qq; +int rm, mu, l9; +long long unsigned int ip; + +void +fi (void) +{ +} + +void +il (long long unsigned int c5) +{ + int *na = μ + + while (l9 < 1) +{ + if (qq < 1) +{ + short int ds = 0; + + if ((ip - *na - ip / c5 != 0) && (*na / ((c5 + c5) && !!ip) != 0)) +__builtin_trap (); + + rm = -1 / (!!rm && !!c5); + + while (qq < 1) +{ + ++*na; + ++ip; + if (*na < (int) ip) +while (ds < 2) + { +++qq; +++ds; + } +} +} + + ++l9; +} + + for (;;) +{ +} +} + +void +uu (short int wk) +{ + int64_t tl = ip; + + while (ip < 2) +{ + signed char vn; + + for (vn = 1; vn != 0; ++vn) +{ + int rh; + + if (qq < 1) +{ + while (mu < 1) +ip = 0; + + while (rm != 0) +++rm; +} + + for (rh = 0; rh < 3; ++rh) +{ + int ab; + + for (ab = 0; ab < 5; ++ab) +{ + tl -= qq; + wk += rh - tl; + ip += (ab < wk) + wk; +} +} +} +} +} + +void +im (uint8_t kt) +{ + int wu = 0; + +
Re: [PATCH] Fix PR89150, GC of tree-form bitmaps
On 2019-02-04 11:39 a.m., Richard Biener wrote: > On February 4, 2019 5:07:00 PM GMT+01:00, Jeff Law wrote: >> On 2/4/19 6:15 AM, Richard Biener wrote: >>> >>> When I introduced tree-form bitmaps I forgot to think about GC. >>> The following drops the chain_prev annotation to make the marker >>> work for trees. I've also maked the obstack member GTY skip >>> (and prevent bitmap_obstack from gengtype processing) because >>> the obstack isn't used for GC allocated bitmaps. >>> >>> Bootstrap & regtest running on x86_64-unknown-linux-gnu. >>> >>> Richard. >>> >>> 2019-02-04 Richard Biener >>> >>> PR middle-end/89150 >>> * bitmap.h (struct bitmap_obstack): Do not mark GTY. >>> (struct bitmap_element): Drop chain_prev so we properly recurse on >>> the prev member, supporting tree views. >>> (struct bitmap_head): GTY skip the obstack member. >> Was there a particular failure mode you observed or was this discovered >> by inspection. > > It was discovered via an out of tree patch, the issue is only latent on trunk > (together with another unfixed pch issue of bitmaps). My apologies for not noticing this thread/PR earlier. I originally discovered this issue while working on an experimental patch, but since then I've also come up with a test case for this specific issue. Is it worth adding it to trunk? - Michael From 9f1049a1e24cd1aa9852c51342dce0226416 Mon Sep 17 00:00:00 2001 From: Michael Ploujnikov Date: Thu, 7 Feb 2019 14:43:40 -0500 Subject: [PATCH] Add a test for GC marking bitmaps in tree view mode gcc/ChangeLog: 2019-02-07 Michael Ploujnikov PR middle-end/89150 * bitmap.c (test_bitmap_tree_marking): New test. (NOT_NULL_OR_GARBAGE): For shortening test_bitmap_tree_marking. (bitmap_c_tests): Add test_bitmap_tree_marking. --- gcc/bitmap.c | 82 1 file changed, 82 insertions(+) diff --git gcc/bitmap.c gcc/bitmap.c index 5a8236de75..7e802a1600 100644 --- gcc/bitmap.c +++ gcc/bitmap.c @@ -2626,6 +2626,11 @@ bitmap_head::dump () #if CHECKING_P +static GTY(()) bitmap selftest_test_bitmap; + +/* From ggc-internal.h */ +extern bool ggc_force_collect; + namespace selftest { /* Selftests for bitmaps. */ @@ -2722,6 +2727,82 @@ test_bitmap_single_bit_set_p () ASSERT_EQ (1066, bitmap_first_set_bit (b)); } +#define NOT_NULL_OR_GARBAGE(NODE) \ + (NODE != NULL && (unsigned long)(NODE) != 0xa5a5a5a5UL) + +static void +test_bitmap_tree_marking () +{ + selftest_test_bitmap = BITMAP_GGC_ALLOC (); + bitmap_tree_view (selftest_test_bitmap); + + ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 1*BITMAP_ELEMENT_ALL_BITS)); + ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 5*BITMAP_ELEMENT_ALL_BITS)); + ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 10*BITMAP_ELEMENT_ALL_BITS)); + ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 15*BITMAP_ELEMENT_ALL_BITS)); + ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 30*BITMAP_ELEMENT_ALL_BITS)); + ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 25*BITMAP_ELEMENT_ALL_BITS)); + ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 3*BITMAP_ELEMENT_ALL_BITS)); + ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 4*BITMAP_ELEMENT_ALL_BITS)); + ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 26*BITMAP_ELEMENT_ALL_BITS)); + /* At this point it should look like: + 26 + / \ +25 30 + / + 5 + / \ +4 15 + / / + 3 10 + / +1 + + */ + ggc_force_collect = true; + ggc_collect (); + ggc_force_collect = false; + + ASSERT_TRUE (NOT_NULL_OR_GARBAGE (selftest_test_bitmap->first)); + ASSERT_TRUE (selftest_test_bitmap->first->indx == 26); + + ASSERT_TRUE (NOT_NULL_OR_GARBAGE (selftest_test_bitmap->first->next)); + ASSERT_TRUE (selftest_test_bitmap->first->next->indx == 30); + ASSERT_TRUE (selftest_test_bitmap->first->next->next == NULL); + ASSERT_TRUE (selftest_test_bitmap->first->next->prev == NULL); + + ASSERT_TRUE (NOT_NULL_OR_GARBAGE (selftest_test_bitmap->first->prev)); + ASSERT_TRUE (selftest_test_bitmap->first->prev->indx == 25); + ASSERT_TRUE (selftest_test_bitmap->first->prev->next == NULL); + + ASSERT_TRUE (NOT_NULL_OR_GARBAGE (selftest_test_bitmap->first->prev->prev)); + ASSERT_TRUE (selftest_test_bitmap->first->prev->prev->indx == 5); + + ASSERT_TRUE (NOT_NULL_OR_GARBAGE (selftest_test_bitmap->first->prev->prev->next)); + ASSERT_TRUE (selftest_test_bitmap->first->prev->prev->next->indx == 15); + ASSERT_TRUE (selftest_test_bitmap->first->prev->prev->next->next == NULL); + + ASSERT_TRUE (NOT_NULL_OR_GARBAGE (selftest_test_bitmap->first->prev->prev->next->prev)); + ASSERT_TRUE (selftest_test_bitmap->first->prev->prev->next->prev->indx == 10); + ASSERT_TRUE (selftest_test_bitmap->first->prev->prev->next->prev->prev == NULL); + ASSERT_TRUE (selftest_test_bitmap->first->prev->prev->next->prev->next == NULL); + + ASSERT_TRUE (NOT_NULL_OR_GARBAGE
Re: [PATCH] Fix PR89150, GC of tree-form bitmaps
On Thu, Feb 07, 2019 at 03:04:21PM -0500, Michael Ploujnikov wrote: > 2019-02-07 Michael Ploujnikov > > PR middle-end/89150 > * bitmap.c (test_bitmap_tree_marking): New test. > (NOT_NULL_OR_GARBAGE): For shortening > test_bitmap_tree_marking. > (bitmap_c_tests): Add test_bitmap_tree_marking. Could you do that instead in a plugin in the testsuite? I mean, the patch is adding garbage collection roots, so it will not affect just -fself-tests run, but also any time the compiler will do GC. Jakub
Re: [PATCH] Fix more ICEs in -fsave-optimization-record (PR tree-optimization/89235)
On February 7, 2019 9:10:15 PM GMT+01:00, David Malcolm wrote: >PR tree-optimization/89235 reports an ICE inside >-fsave-optimization-record >whilst reporting the inlining chain of of the location_t in the >vect_location global. > >This is very similar to PR tree-optimization/86637, fixed in r266821. > >The issue is that the inlining chains are read from the location_t's >ad-hoc data, referencing GC-managed tree blocks, but the former are >not GC roots; it's simply assumed that old locations referencing dead >blocks never get used again. > >The fix is to reset the "vect_location" global in more places. Given >that is a somewhat subtle detail, the patch adds a sentinel class to >reset vect_location at the end of a scope. Doing it as a class >simplifies the task of ensuring that the global is reset on every >exit path from a function, and also gives a good place to signpost >the above subtlety (in the documentation for the class). > >The patch also adds test cases for both of the PRs mentioned above. > >Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. > >OK for trunk? OK. Richard. >gcc/testsuite/ChangeLog: > PR tree-optimization/86637 > PR tree-optimization/89235 > * gcc.c-torture/compile/pr86637-1.c: New test. > * gcc.c-torture/compile/pr86637-2.c: New test. > * gcc.c-torture/compile/pr86637-3.c: New test. > * gcc.c-torture/compile/pr89235.c: New test. > >gcc/ChangeLog: > PR tree-optimization/86637 > PR tree-optimization/89235 > * tree-vect-loop.c (optimize_mask_stores): Add an > auto_purge_vect_location sentinel to ensure that vect_location is > purged on exit. > * tree-vectorizer.c > (auto_purge_vect_location::~auto_purge_vect_location): New dtor. > (try_vectorize_loop_1): Add an auto_purge_vect_location sentinel > to ensure that vect_location is purged on exit. > (pass_slp_vectorize::execute): Likewise, replacing the manual > reset. > * tree-vectorizer.h (class auto_purge_vect_location): New class. >--- > gcc/testsuite/gcc.c-torture/compile/pr86637-1.c | 13 +++ >gcc/testsuite/gcc.c-torture/compile/pr86637-2.c | 128 > > gcc/testsuite/gcc.c-torture/compile/pr86637-3.c | 11 ++ > gcc/testsuite/gcc.c-torture/compile/pr89235.c | 57 +++ > gcc/tree-vect-loop.c| 1 + > gcc/tree-vectorizer.c | 13 ++- > gcc/tree-vectorizer.h | 18 > 7 files changed, 239 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr86637-1.c > create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr86637-2.c > create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr86637-3.c > create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr89235.c > >diff --git a/gcc/testsuite/gcc.c-torture/compile/pr86637-1.c >b/gcc/testsuite/gcc.c-torture/compile/pr86637-1.c >new file mode 100644 >index 000..61b6381 >--- /dev/null >+++ b/gcc/testsuite/gcc.c-torture/compile/pr86637-1.c >@@ -0,0 +1,13 @@ >+/* { dg-options "-fsave-optimization-record -ftree-slp-vectorize >--param ggc-min-expand=1 --param ggc-min-heapsize=1024" } */ >+ >+void >+en (void) >+{ >+} >+ >+void >+n4 (int zb) >+{ >+ while (zb < 1) >+++zb; >+} >diff --git a/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c >b/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c >new file mode 100644 >index 000..3b675ea >--- /dev/null >+++ b/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c >@@ -0,0 +1,128 @@ >+/* { dg-do compile { target fgraphite } } */ >+/* { dg-options "-floop-parallelize-all -fsave-optimization-record >-ftree-parallelize-loops=2 -ftree-slp-vectorize" } */ >+ >+#include >+#include >+ >+signed char qq; >+int rm, mu, l9; >+long long unsigned int ip; >+ >+void >+fi (void) >+{ >+} >+ >+void >+il (long long unsigned int c5) >+{ >+ int *na = μ >+ >+ while (l9 < 1) >+{ >+ if (qq < 1) >+{ >+ short int ds = 0; >+ >+ if ((ip - *na - ip / c5 != 0) && (*na / ((c5 + c5) && !!ip) >!= 0)) >+__builtin_trap (); >+ >+ rm = -1 / (!!rm && !!c5); >+ >+ while (qq < 1) >+{ >+ ++*na; >+ ++ip; >+ if (*na < (int) ip) >+while (ds < 2) >+ { >+++qq; >+++ds; >+ } >+} >+} >+ >+ ++l9; >+} >+ >+ for (;;) >+{ >+} >+} >+ >+void >+uu (short int wk) >+{ >+ int64_t tl = ip; >+ >+ while (ip < 2) >+{ >+ signed char vn; >+ >+ for (vn = 1; vn != 0; ++vn) >+{ >+ int rh; >+ >+ if (qq < 1) >+{ >+ while (mu < 1) >+ip = 0; >+ >+ while (rm != 0) >+++rm; >+} >+ >+ for (rh = 0; rh < 3; ++rh) >+{ >+ int ab; >+ >+ for (ab = 0; ab < 5; ++
[PATCH] i386: Use OI/TImode in *mov[ot]i_internal_avx with AVX512VL
OImode and TImode moves must be done in XImode to access upper 16 vector registers without AVX512VL. With AVX512VL, we can access upper 16 vector registers in OImode and TImode. PR target/89229 * config/i386/i386.md (*movoi_internal_avx): Set mode to XI for upper 16 vector registers without TARGET_AVX512VL. (*movti_internal): Likewise. --- gcc/config/i386/i386.md | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c1492363bca..e7f4b9a9c8d 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1933,8 +1933,9 @@ (set_attr "type" "sselog1,sselog1,ssemov,ssemov") (set_attr "prefix" "vex") (set (attr "mode") - (cond [(ior (match_operand 0 "ext_sse_reg_operand") - (match_operand 1 "ext_sse_reg_operand")) + (cond [(and (ior (match_operand 0 "ext_sse_reg_operand") +(match_operand 1 "ext_sse_reg_operand")) + (match_test "!TARGET_AVX512VL")) (const_string "XI") (and (eq_attr "alternative" "1") (match_test "TARGET_AVX512VL")) @@ -2012,8 +2013,9 @@ (set (attr "mode") (cond [(eq_attr "alternative" "0,1") (const_string "DI") - (ior (match_operand 0 "ext_sse_reg_operand") - (match_operand 1 "ext_sse_reg_operand")) + (and (ior (match_operand 0 "ext_sse_reg_operand") +(match_operand 1 "ext_sse_reg_operand")) + (match_test "!TARGET_AVX512VL")) (const_string "XI") (and (eq_attr "alternative" "3") (match_test "TARGET_AVX512VL")) -- 2.20.1
[PATCH] Fix ICE due to copy_reg_eh_region_note_forward (PR rtl-optimization/89234)
Hi! The following testcase ICEs on ppc64le. The problem is that copy_reg_eh_region_note_* functions accept either some instruction, or REG_EH_REGION note directly. To differentiate between those it uses INSN_P test (and returns early if the insn doesn't contain any REG_EH_REGION notes). If the function is called on a rtx_insn * that isn't INSN_P, like on the testcase on a NOTE, it assumes it must be REG_EH_REGION note, uses XEXP (note, 0) on it, which is actually PREV_INSN in this case and stores an instruction (JUMP_INSN in this case) into REG_EH_REGION notes it creates. I believe we should treat rtx_insn * that aren't INSN_P like instructions that don't have any REG_EH_REGION notes. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2019-02-07 Jakub Jelinek PR rtl-optimization/89234 * except.c (copy_reg_eh_region_note_forward): Return if note_or_insn is a NOTE, CODE_LABEL etc. - rtx_insn * other than INSN_P. (copy_reg_eh_region_note_backward): Likewise. * g++.dg/ubsan/pr89234.C: New test. --- gcc/except.c.jj 2019-01-10 11:43:14.387377695 +0100 +++ gcc/except.c2019-02-07 15:11:27.756869475 +0100 @@ -1756,6 +1756,8 @@ copy_reg_eh_region_note_forward (rtx not if (note == NULL) return; } + else if (is_a (note_or_insn)) +return; note = XEXP (note, 0); for (insn = first; insn != last ; insn = NEXT_INSN (insn)) @@ -1778,6 +1780,8 @@ copy_reg_eh_region_note_backward (rtx no if (note == NULL) return; } + else if (is_a (note_or_insn)) +return; note = XEXP (note, 0); for (insn = last; insn != first; insn = PREV_INSN (insn)) --- gcc/testsuite/g++.dg/ubsan/pr89234.C.jj 2019-02-07 17:12:02.081024666 +0100 +++ gcc/testsuite/g++.dg/ubsan/pr89234.C2019-02-07 17:08:55.026097949 +0100 @@ -0,0 +1,11 @@ +// PR rtl-optimization/89234 +// { dg-do compile { target dfp } } +// { dg-options "-O2 -fnon-call-exceptions -fsanitize=null" } + +typedef float __attribute__((mode (SD))) _Decimal32; + +void +foo (_Decimal32 *b, _Decimal32 c) +{ + *b = c + 1.5; +} Jakub
[PATCH] Narrow ARRAY*_REF indexes to sizetype for gimple (PR tree-optimization/89223)
Hi! As mentioned in the PR, given that during expansion we expand ARRAY_REFs using get_inner_reference that casts ARRAY_*REF indexes to sizetype, all the bits above sizetype are ignored (whether one uses long long indexes on 32-bit targets or __int128 indexes on 64-bit ones), so I think it is desirable to make that visible already for GIMPLE optimizations, where we can narrow other arithmetic stmts feeding those indexes etc., it could help vectorization (e.g. gather/scatter), etc. Of course, fixing tree-data-ref.c etc. to cope with wider constants or chrecs is desirable as well. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2019-02-06 Jakub Jelinek PR tree-optimization/89223 * gimplify.c (gimplify_compound_lval): Narrow ARRAY*_REF indexes wider than sizetype to sizetype. * gcc.c-torture/execute/pr89223.c: New test. --- gcc/gimplify.c.jj 2019-01-28 23:30:53.199762928 +0100 +++ gcc/gimplify.c 2019-02-06 17:15:35.368976785 +0100 @@ -2977,6 +2977,12 @@ gimplify_compound_lval (tree *expr_p, gi if (TREE_CODE (t) == ARRAY_REF || TREE_CODE (t) == ARRAY_RANGE_REF) { + /* Expansion will cast the index to sizetype, so any excess bits +aren't useful for optimizations. */ + if (!error_operand_p (TREE_OPERAND (t, 1)) + && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (t, 1))) + > TYPE_PRECISION (sizetype))) + TREE_OPERAND (t, 1) = fold_convert (sizetype, TREE_OPERAND (t, 1)); /* Gimplify the dimension. */ if (!is_gimple_min_invariant (TREE_OPERAND (t, 1))) { --- gcc/testsuite/gcc.c-torture/execute/pr89223.c.jj2019-02-07 18:09:39.869088769 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr89223.c 2019-02-07 18:09:12.359543231 +0100 @@ -0,0 +1,27 @@ +/* PR tree-optimization/89223 */ +/* { dg-do run { target int128 } } */ + +int a[9] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; +unsigned __int128 b; + +__attribute__((noipa)) void +foo (void) +{ + for (b = (((unsigned __int128) 4) << 64) + 4; b != 0; b -= unsigned __int128) 1) << 64) + 1)) +a[b] = a[b + b]; +} + +int +main () +{ + int i; + if (sizeof (__UINTPTR_TYPE__) * __CHAR_BIT__ != 64 + || sizeof (__SIZE_TYPE__) * __CHAR_BIT__ != 64 + || sizeof (__int128) * __CHAR_BIT__ != 128) +return 0; + foo (); + for (i = 0; i < 9; ++i) +if (a[i] != (((1 << i) & 0x16) != 0 ? 9 : i == 3 ? 7 : i + 1)) + __builtin_abort (); + return 0; +} Jakub
[PATCH] Fix up pre_and_rev_post_order_compute_fn
Hi! I've noticed pre_and_rev_post_order_compute_fn is one of (apparently many) functions that take a struct function * argument and accept any function, as long as it is cfun. For a patch I've been working on I actually need it to handle other functions as well and in this case it is trivial to fix. I've seen several others e.g. in graph.c, though some of those are harder to deal with, as some there are calls to functions that don't have a variant with struct function * argument and assume cfun implicitly. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2019-02-07 Jakub Jelinek * cfganal.c (pre_and_rev_post_order_compute_fn): Use fn instead of cfun everywhere. --- gcc/cfganal.c.jj2019-01-01 12:37:17.915962514 +0100 +++ gcc/cfganal.c 2019-02-07 20:47:29.035520143 +0100 @@ -951,10 +951,10 @@ pre_and_rev_post_order_compute_fn (struc bool include_entry_exit) { int pre_order_num = 0; - int rev_post_order_num = n_basic_blocks_for_fn (cfun) - 1; + int rev_post_order_num = n_basic_blocks_for_fn (fn) - 1; /* Allocate stack for back-tracking up CFG. */ - auto_vec stack (n_basic_blocks_for_fn (cfun) + 1); + auto_vec stack (n_basic_blocks_for_fn (fn) + 1); if (include_entry_exit) { @@ -968,7 +968,7 @@ pre_and_rev_post_order_compute_fn (struc rev_post_order_num -= NUM_FIXED_BLOCKS; /* Allocate bitmap to track nodes that have been visited. */ - auto_sbitmap visited (last_basic_block_for_fn (cfun)); + auto_sbitmap visited (last_basic_block_for_fn (fn)); /* None of the nodes in the CFG have been visited yet. */ bitmap_clear (visited); Jakub
Re: [C++ Patch] PR 88986 ("[7/8/9 Regression] ICE: tree check: expected tree that contains 'decl minimal' structure, have 'error_mark' in member_vec_binary_search, at cp/name-lookup.c:1136")
On 2/5/19 4:39 AM, Paolo Carlini wrote: Hi, On 04/02/19 17:48, Jason Merrill wrote: On Mon, Feb 4, 2019 at 11:00 AM Paolo Carlini wrote: On 04/02/19 15:47, Jason Merrill wrote: On 2/1/19 3:52 PM, Paolo Carlini wrote: Hi, I think that this ICE on invalid (and valid, for c++17+) can be in fact avoided by accepting in make_typename_type a TYPE_PACK_EXPANSION as context, thus by not triggering the "‘T ...’ is not a class" error. Not sure if a better fix would be something more general. Note, anyway, that we are asserting TYPE_P (context) thus TYPE_PACK_EXPANSIONs definitely get through beyond MAYBE_CLASS_TYPE_P. The testcase should test that the using actually works, i.e. imports a type from a base class. Uhm, if I change the testcase to something like: struct B { typedef int type; }; template struct C : T... { using typename T::type ...; void f() { type value; } }; template class C; we get a "sorry, unimplemented: use of ‘type_pack_expansion’ in template" for value I suspected that would happen. which, arguably, is better than the current ICE, but I'm not sure if we are close to completing the implementation of this / we even want to attempt that at this Stage?!? Well, I'd look at how we implement the equivalent for e.g. a variable: struct A { static int i; }; template struct B: T... { using T::i ...; }; auto x = B::i; // works and try to use that code path for the typename case as well. Yes. Note that, AFAICS, the code you added to implement P0195R2 works fine for typename too. The problem happens when we actually encounter inside the class something like: void f() { type value; } which, so to speak, appears to expand to a set of types: 'type' is a TYPENAME_TYPE which has a TYPE_PACK_EXPANSION as TYPE_CONTEXT. I wonder about replacing uses of 'type' with a TYPENAME_TYPE with the current class as TYPE_CONTEXT? But your approach is OK, too. Jason
Re: [PATCH] print correct array sizes in errors (PR 87996)
On 2/7/19 1:57 PM, Martin Sebor wrote: + /* The original numeric size as seen in the source code after + any substitution and before conversion to size_t. */ I don't think this should mention substitution. With that tweak the C++ changes are OK. Jason
C++ PATCH for c++/89217 - ICE with list-initialization in range-based for loop
Since r268321 we can call digest_init even in a template, when the compound literal isn't instantiation-dependent. Consequently, when we get to case RANGE_FOR_STMT in tsubst_expr, RANGE_FOR_EXPR might already have been digested, as in this case, where before digesting it was {*((struct S *) this)->r} and now it's TARGET_EXPR r}> (that's correct). Now, in tsubst_expr, we recurse on the latter: 17095 expr = RECUR (RANGE_FOR_EXPR (t)); and since the expression contains a COMPONENT_REF, we end up calling finish_non_static_data_member which calls build_class_member_access_expr with preserve_reference=false. Thus, after we've tsubst'd the RANGE_FOR_EXPR, we have TARGET_EXPR r}> Nevermind those casts, but "(struct R *) *((struct S *) this)->r" causes problems later in cp_fold -> cp_convert_to_pointer because we're trying to convert "*((struct S *) this)->r" of type R to R *. That errors and the error_mark_node causes grief. My first attempt was to handle this in tsubst_copy_and_build's case COMPONENT_REF, after the call to finish_non_static_data_member, but that breaks -- we can't have a LHS of a MODIFY_EXPR of a reference type. So it seems this needs to be handled closer to where it actually fails, for instance in ocp_convert. Bootstrapped/regtested on x86_64-linux, ok for trunk? 2019-02-07 Marek Polacek PR c++/89217 - ICE with list-initialization in range-based for loop. * cvt.c (ocp_convert): Unwrap REFERENCE_REF_P in a COMPONENT_REF. * g++.dg/cpp0x/range-for37.C: New test. diff --git gcc/cp/cvt.c gcc/cp/cvt.c index 82a44f353c7..92677cff0c5 100644 --- gcc/cp/cvt.c +++ gcc/cp/cvt.c @@ -857,7 +857,15 @@ ocp_convert (tree type, tree expr, int convtype, int flags, return ignore_overflows (converted, e); } if (INDIRECT_TYPE_P (type) || TYPE_PTRMEM_P (type)) -return cp_convert_to_pointer (type, e, dofold, complain); +{ + /* Undo what finish_non_static_data_member might have done, i.e. +turning e.g. (R *)((S *)this)->r into (R *)*((S *)this)->r, +rendering the conversion invalid. */ + if (REFERENCE_REF_P (e) + && TREE_CODE (TREE_OPERAND (e, 0)) == COMPONENT_REF) + e = TREE_OPERAND (e, 0); + return cp_convert_to_pointer (type, e, dofold, complain); +} if (code == VECTOR_TYPE) { tree in_vtype = TREE_TYPE (e); diff --git gcc/testsuite/g++.dg/cpp0x/range-for37.C gcc/testsuite/g++.dg/cpp0x/range-for37.C new file mode 100644 index 000..d5c7c091d96 --- /dev/null +++ gcc/testsuite/g++.dg/cpp0x/range-for37.C @@ -0,0 +1,24 @@ +// PR c++/89217 +// { dg-do compile { target c++11 } } + +struct R {}; + +struct C +{ +R* begin() const { return &r; } +R* end() const { return &r; } + +R& r; +}; + +struct S +{ +void f1() { f2(); } +R& r; + +template +void f2() +{ +for (auto i : C{r}) {} +} +};
Re: [PATCH] print correct array sizes in errors (PR 87996)
On 2/7/19 4:00 PM, Jason Merrill wrote: On 2/7/19 1:57 PM, Martin Sebor wrote: + /* The original numeric size as seen in the source code after + any substitution and before conversion to size_t. */ I don't think this should mention substitution. With that tweak the C++ changes are OK. Does someone still need to approve the rest? Those changes are trivial: they just move max_object_size from builtins.{c,h} to tree.{c.h}. The change to c-parser.c was accidental and wasn't meant to be included it in the last diff. I attach yet another update with that bit removed (and the tweak comment above). Martin PR c++/87996 - size of array is negative error when SIZE_MAX/2 < sizeof(array) <= SIZE_MAX gcc/ChangeLog: PR c++/87996 * builtins.c (max_object_size): Move from here... * builtins.h (max_object_size): ...and here... * tree.c (max_object_size): ...to here... * tree.h (max_object_size): ...and here. gcc/c-family/ChangeLog: PR c++/87996 * c-common.c (invalid_array_size_error): New function. (valid_array_size_p): Call it. Handle size as well as type. * c-common.h (valid_constant_size_p): New function. (enum cst_size_error): New type. gcc/cp/ChangeLog: PR c++/87996 * decl.c (compute_array_index_type_loc): Preserve signed sizes for diagnostics. Call valid_array_size_p instead of error. * init.c (build_new_1): Compute size for diagnostic. Call invalid_array_size_error (build_new): Call valid_array_size_p instead of error. gcc/testsuite/ChangeLog: PR c++/87996 * c-c++-common/array-5.c: New test. * c-c++-common/pr68107.c: Adjust text of diagnostics. * g++.dg/init/new38.C: Same. * g++.dg/init/new43.C: Same. * g++.dg/init/new44.C: Same. * g++.dg/init/new46.C: Same. * g++.dg/other/large-size-array.C: Same. * g++.dg/other/new-size-type.C: Same. * g++.dg/template/array30.C: Same. * g++.dg/template/array32.C: New test. * g++.dg/template/dependent-name3.C: Adjust. * gcc.dg/large-size-array-3.c: Same. * gcc.dg/large-size-array-5.c: Same. * gcc.dg/large-size-array.c: Same. * g++.old-deja/g++.brendan/array1.C: Same. * g++.old-deja/g++.mike/p6149.C: Same. Index: gcc/builtins.c === --- gcc/builtins.c (revision 268547) +++ gcc/builtins.c (working copy) @@ -11210,12 +11210,3 @@ target_char_cst_p (tree t, char *p) *p = (char)tree_to_uhwi (t); return true; } - -/* Return the maximum object size. */ - -tree -max_object_size (void) -{ - /* To do: Make this a configurable parameter. */ - return TYPE_MAX_VALUE (ptrdiff_type_node); -} Index: gcc/builtins.h === --- gcc/builtins.h (revision 268547) +++ gcc/builtins.h (working copy) @@ -150,6 +150,5 @@ extern internal_fn replacement_internal_fn (gcall extern void warn_string_no_nul (location_t, const char *, tree, tree); extern tree unterminated_array (tree, tree * = NULL, bool * = NULL); -extern tree max_object_size (); #endif /* GCC_BUILTINS_H */ Index: gcc/c-family/c-common.c === --- gcc/c-family/c-common.c (revision 268547) +++ gcc/c-family/c-common.c (working copy) @@ -8231,29 +8231,82 @@ reject_gcc_builtin (const_tree expr, location_t lo return false; } +/* Issue an ERROR for an invalid SIZE of array NAME which is null + for unnamed arrays. */ + +void +invalid_array_size_error (location_t loc, cst_size_error error, + const_tree size, const_tree name) +{ + tree maxsize = max_object_size (); + switch (error) +{ +case cst_size_negative: + if (name) + error_at (loc, "size %qE of array %qE is negative", + size, name); + else + error_at (loc, "size %qE of array is negative", + size); + break; +case cst_size_too_big: + if (name) + error_at (loc, "size %qE of array %qE exceeds maximum " + "object size %qE", size, name, maxsize); + else + error_at (loc, "size %qE of array exceeds maximum " + "object size %qE", size, maxsize); + break; +case cst_size_overflow: + if (name) + error_at (loc, "size of array %qE exceeds maximum " + "object size %qE", name, maxsize); + else + error_at (loc, "size of array exceeds maximum " + "object size %qE", maxsize); + break; +default: + gcc_unreachable (); +} +} + /* Check if array size calculations overflow or if the array covers more than half of the address space. Return true if the size of the array - is valid, false otherwise. TYPE is the type of the array and NAME is - the name of the array, or NULL_TREE for unnamed arrays. */ + is valid, false otherwise. T is either the type of the array or its + size, and NAME is the name of the array, or null for unnamed arrays. */ bool -valid_array_size_p (location_t loc, tree type, tree name, bool complain) +valid_array_size_p (location_t loc, const_tree t, tree name, bool complain) { - if (type != error_mark_node - && COM
Re: [PATCH] Integration of parallel standard algorithms for c++17
> We'll need to add a copy of the LICENSE.TXT file to our sources, since > it's referred to by the comments at the top of each PSTL file. > Right, we were going to discuss where the "right place" under libstdc++-v3/ would be for that. As for the rest, noted, I'll add them to the list for the next go. Jonathan Wakely writes: > On 31/01/19 21:08 -0800, Thomas Rodgers wrote: >>Update C++17 parallel algorithms to LLVM/MIT licensed upstream sources > > Some lines in bits/c++config.h need to be split before 80 columns > (with a backslash if the split is in the middle of a preprocessor > condition obviously). > > There are loads of very long lines in the actual PSTL headers too, but > I think we need to keep them similar to the upstream headers to aid > merging, so can't gratuitously reformat them. I'm assuming that > doesn't apply to since that's our header and any > changes will need to be manually applied anyway, right? > > We'll need to add a copy of the LICENSE.TXT file to our sources, since > it's referred to by the comments at the top of each PSTL file. > > Typo in include/Makefile.am: ${pstl_srcdir}/memoy_impl.h > (Which will require regenerating include/Makefile.in). > > The __cpp_lib_parallel_algorithm macro needs to be (conditionally) > defined in as well as and . > > The namespaces par_backend and unseq_backend need to be uglified. > > ALL the names in the __pstl::internal namespace need to be uglified: > brick_any_of > pattern_any_of > for_each_n_it_serial > brick_walk1 > pattern_walk1 > etc. > > I see quite a few calls to functions that should probably be qualified > to prevent ADL, but that can be fixed later (and upstream first): > > return brick_count(... > return except_handler([&]() { ... > > > The DejaGnu directives in the pstl tests need to be in this order: > > // { dg-options "-std=gnu++17 -ltbb" } > // { dg-do run { target c++17 } } > // { dg-require-effective-target tbb-backend } > > Currently the dg-require-effective-target comes before the dg-do > line, so doesn't work. DejaGnu processes the lines in order. When it > sees the dg-do for target c++17 it overrides the effect of any earlier > dg-require-effective-target. > > I fixed that locally like so: > find testsuite/20_util/specialized_algorithms/pstl/ > testsuite/25_algorithms/pstl/ testsuite/26_numerics/pstl/ -name \*.cc | xargs > sed -i '/dg-require-eff/{h;d};/dg-do/{p;x}' > And now I don't get FAIL results on a system with no TBB installed > (which is great). I also get no FAIL results on a system with TBB > installed (also great). > > > There are two copies (with slight differences) of each of the > uninitialized_xxx tests: > > libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/specialized.algorithms/uninitialized_construct.cc > | 128 +++ > libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/specialized.algorithms/uninitialized_copy_move.cc > | 143 > libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/specialized.algorithms/uninitialized_fill_destroy.cc > | 103 ++ > libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/uninitialized_construct.cc >| 132 > libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/uninitialized_copy_move.cc >| 154 + > libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/uninitialized_fill_destroy.cc > | 104 ++ > > That's all for now, but I'll keep making passes over it, going into > more detail ...
Re: [PATCH, fortran ieee]: PR 88678, Many gfortran.dg/ieee/ieee_X.f90 test cases fail starting with r267465
On Thu, 2019-01-31 at 08:46 +0100, Uros Bizjak wrote: > On Wed, Jan 30, 2019 at 9:51 PM Janne Blomqvist > > > This seems to change the only user of support_fpu_trap() that is > > different from support_fpu_flag(), so with this change one could > > remove support_fpu_trap() entirely and modify all callers (since > > it's an internal function it's not used outside libgfortran) to > > call support_fpu_flag() directly. Otherwise Ok. > > Some targets only support IEEE flags (so, flags in some FP status > register), but not exceptions (traps) based on these flags that halt > the program. Currently, fpu-glibc.h assumes that all flags can > generate exceptions (that is true for the current set of gfortran > targets), but some future target wants to return 0 from > support_fpu_trap. > > Uros. Is this actually true for all existing gfortran targets? Specifically I am wondering about Arm and Aarch64. PR 78314 says that ARM trapping FPU exceptions are optional. I am currently seeing gfortran.dg/ieee/ieee_6.f90 fail on my Aarch64 ThunderX box. I wonder if we should have an Arm/Aarch64 specific version of the fpu header file (fpu-arm.h) that would use the previous version of support_fpu_trap. Steve Ellcey sell...@marvell.com
[RS6000] Don't support inline PLT for ABI_V4 bss-plt
Inline PLT calls need PLT to be an array of addresses. bss-plt works differently. Bootstrap and regression test on powerpc64-linux biarch in progress. OK assuming no regressions? * config/rs6000/rs6000.c (rs6000_longcall_ref): Don't use inline plt for ABI_V4 bss-plt. (rs6000_call_sysv): Likewise. diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 711278c7422..cced90bb518 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -32819,7 +32819,8 @@ rs6000_longcall_ref (rtx call_ref, rtx arg) } if (HAVE_AS_PLTSEQ - && (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4)) + && (DEFAULT_ABI == ABI_ELFv2 + || (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT))) { rtx base = const0_rtx; int regno; @@ -37981,7 +37982,7 @@ rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie) func = rs6000_longcall_ref (func_desc, tlsarg); /* If the longcall was implemented using PLT16 relocs, then r11 needs to be valid at the call for lazy linking. */ - if (HAVE_AS_PLTSEQ) + if (HAVE_AS_PLTSEQ && REGNO (func) == 11) abi_reg = func; } @@ -37994,7 +37995,7 @@ rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie) calls via LR, so move the address there. Needed to mark this insn for linker plt sequence editing too. */ func_addr = gen_rtx_REG (Pmode, CTR_REGNO); - if (HAVE_AS_PLTSEQ + if (HAVE_AS_PLTSEQ && REGNO (func) == 11 && GET_CODE (func_desc) == SYMBOL_REF) { rtvec v = gen_rtvec (3, func, func_desc, tlsarg); @@ -38051,7 +38052,7 @@ rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie) func = rs6000_longcall_ref (func_desc, tlsarg); /* If the longcall was implemented using PLT16 relocs, then r11 needs to be valid at the call for lazy linking. */ - if (HAVE_AS_PLTSEQ) + if (HAVE_AS_PLTSEQ && REGNO (func) == 11) abi_reg = func; } @@ -38063,7 +38064,7 @@ rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie) /* Indirect sibcalls must go via CTR. Needed to mark this insn for linker plt sequence editing too. */ func_addr = gen_rtx_REG (Pmode, CTR_REGNO); - if (HAVE_AS_PLTSEQ + if (HAVE_AS_PLTSEQ && REGNO (func) == 11 && GET_CODE (func_desc) == SYMBOL_REF) { rtvec v = gen_rtvec (3, func, func_desc, tlsarg); -- Alan Modra Australia Development Lab, IBM
[RS6000] Correct save_reg_p
Fixes lack of r30 save/restore on // -m32 -fpic -ftls-model=initial-exec __thread char* p; char** f1 (void) { return &p; } and // -m32 -fpic -msecure-plt extern int foo (int); int f1 (int x) { return foo (x); } These are both caused by save_reg_p returning false when the pic offset table reg (r30 for ABI_V4) was used, due to the logic not exactly matching that in rs6000_emit_prologue to set up r30. I've simplified the ABI_V4 logic down to df_regs_ever_live_p since that is obviously correct for all ABIs. I also noticed that save_reg_p isn't following the comment regarding calls_eh_return (since svn 267049, git 0edf78b1b2a0), and the comment needs tweaking too. For why the revised comment is correct, grep for saves_all_registers in lra.c, and yes, we do want to save the pic offset table reg for eh_return. Bootstrap and regression test on powerpc64-linux biarch in progress. OK assuming no regressions? Segher this patch supercedes https://gcc.gnu.org/ml/gcc-patches/2019-02/msg00307.html as there's no need to set crtl->uses_pic_offset_table for those cases now with the df_regs_ever_live_p test in save_reg_p. PR target/88343 * config/rs6000/rs6000.c (save_reg_p): Correct calls_eh_return case. Match logic in rs6000_emit_prologue emitting pic_offset_table setup, simplifying ABI_V4 case to df_regs_ever_live_p. diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index cced90bb518..9933d4443f7 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -24013,18 +24013,26 @@ save_reg_p (int reg) if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE) { + if (df_regs_ever_live_p (reg)) + return true; + /* When calling eh_return, we must return true for all the cases where conditional_register_usage marks the PIC offset reg -call used. */ +call used or fixed. */ + if (crtl->calls_eh_return + && ((DEFAULT_ABI == ABI_V4 && flag_pic) + || (DEFAULT_ABI == ABI_DARWIN && flag_pic) + || (TARGET_TOC && TARGET_MINIMAL_TOC))) + return true; + if (TARGET_TOC && TARGET_MINIMAL_TOC - && (crtl->calls_eh_return - || df_regs_ever_live_p (reg) - || !constant_pool_empty_p ())) + && !constant_pool_empty_p ()) return true; - if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) + if (DEFAULT_ABI == ABI_DARWIN && flag_pic && crtl->uses_pic_offset_table) return true; + return false; } return !call_used_regs[reg] && df_regs_ever_live_p (reg); -- Alan Modra Australia Development Lab, IBM
Re: [PATCH] print correct array sizes in errors (PR 87996)
On 2/7/19 6:27 PM, Martin Sebor wrote: On 2/7/19 4:00 PM, Jason Merrill wrote: On 2/7/19 1:57 PM, Martin Sebor wrote: + /* The original numeric size as seen in the source code after + any substitution and before conversion to size_t. */ I don't think this should mention substitution. With that tweak the C++ changes are OK. Does someone still need to approve the rest? Those changes are trivial: they just move max_object_size from builtins.{c,h} to tree.{c.h}. The change to c-parser.c was accidental and wasn't meant to be included it in the last diff. I attach yet another update with that bit removed (and the tweak comment above). OK. Jason
Re: [REVISED PATCH 5/9]: C++ P0482R5 char8_t: Standard library support
On 2/7/19 4:44 AM, Jonathan Wakely wrote: On 23/12/18 21:27 -0500, Tom Honermann wrote: Attached is a revised patch that addresses changes in P0482R6. Changes from the prior patch include: - Updated the value of the __cpp_char8_t feature test macro to 201811. Tested on x86_64-linux. Thanks, Tom, this is great work! The front-end changes for char8_t went in recently, and I'm finally ready to commit the library parts. Great! There's one big problem I found in this patch, which is that the new numeric_limits specialization uses constexpr unconditionally. That fails if is compiled using options like -std=c++98 -fno-char8_t because the specialization will be used, but the constexpr keyword isn't allowed. That's easily fixed by replacing the keyword with _GLIBCXX_CONSTEXPR. Hmm, the code for the char8_t specialization was copied from the char16_t specialization which also uses constexpr unconditionally (but is guarded by a C++11+ requirement). The char8_t specialization must be elided when the compiler is invoked with -std=c++98 -fno-char8_t (since the char8_t type doesn't exist then). The _GLIBCXX_USE_CHAR8_T guard doesn't suffice for this? _GLIBCXX_USE_CHAR8_T should only be defined if __cpp_char8_t is defined; and that should only be defined if -fchar8_t or -std=c++2a is specified. Or perhaps you intended -std=c++98 -fchar8_t? I agree in that case that use of _GLIBCXX_CONSTEXPR is necessary. The other way to solve that problem would be for the compiler to give an error if -fchar8_t is used with C++98, but I see no fundamental reason that combination of options shouldn't be allowed. We can support it in the library by using the macro. Agreed. As discussed in San Diego, the other change needed is to add the abi_tag attribute to the new versions of path::u8string and path::generic_u8string, so that the mangling is different when its return type is different: #ifdef _GLIBCXX_USE_CHAR8_T __attribute__((__abi_tag__("__u8"))) std::u8string u8string() const; #else std::string u8string() const; #endif // _GLIBCXX_USE_CHAR8_T Otherwise we get ODR violations when linking objects compiled with -fchar8_t enabled to objects with it disabled (e.g. linking -std=c++17 objects to -std=c++2a objects, which needs to work). Are ODR violations bad? :) I suggest "__u8" as the name of the ABI tag, but I'm open to other suggestions. "__char8_t" is a bit long and verbose. "__cxx20" would be consistent with "__cxx11" used for the new ABI introduced in GCC 5 but it regularly confuses people who think it is coupled to the -std=c++11 option (and so don't understand why they still see it for -std=c++14). I have no preference or alternative suggestions here. Had I recognized the issue, I would have asked you what to do about it :) Also, I see that you've made changes to (to add the experimental::u8string_view typedef) and to std::experimental::path (to change the return type of u8string and generic_u8string). The former change is fairly harmless; it only adds a typedef, albeit one which is not a reserved name in C++14/C++17 and so should be available for users to define as a macro. Maybe prior to C++2a we should only define it when GNU extensions are enabled (i.e. when using -std=gnu++14 not -std=c++14): #if defined _GLIBCXX_USE_CHAR8_T \ && (__cplusplus > 201703L || !defined __STRICT_ANSI__) using u8string_view = basic_string_view; #endif That makes sense. Changing the return type of experimental::path members concerns me more. That's a published TS which is not going to be revised, and it's not obvious to me that users would want the change in semantics. If somebody is still using the Filesystem TS in C++2a code, they're probably not expecting it to change. If they need to update their code for C++2a they might as well just use std::filesystem, and so having char8_t support in std::experimental::filesystem isn't clearly useful. I agree. I added the support to the experimental implementations more out of a desire to be complete and to remove any potential barriers to use of -fchar8_t than because I felt the changes were really necessary. I would be perfectly fine with skipping the updates to the experimental libraries completely. Tom.
Re: [REVISED PATCH 7/9]: C++ P0482R5 char8_t: New standard library tests
On 2/7/19 4:54 AM, Jonathan Wakely wrote: On 23/12/18 21:27 -0500, Tom Honermann wrote: Attached is a revised patch that addresses changes in P0482R6. Changes from the prior patch include: - Updated the value of the __cpp_char8_t feature test macro to 201811. Tested on x86_64-linux. There are quite a few additional changes needed to make the testsuite pass cleanly with non-default options, e.g. when running it with RUNTESTFLAGS=--target_board=unix/-fchar8_t/-fno-inline I see these failures: I remember thinking that I had to deal with this at one point. It seems I then forgot about it. FAIL: 21_strings/basic_string/literals/types.cc (test for excess errors) FAIL: 21_strings/basic_string/literals/values.cc (test for excess errors) UNRESOLVED: 21_strings/basic_string/literals/values.cc compilation failed to produce executable FAIL: 21_strings/basic_string_view/literals/types.cc (test for excess errors) FAIL: 21_strings/basic_string_view/literals/values.cc (test for excess errors) UNRESOLVED: 21_strings/basic_string_view/literals/values.cc compilation failed to produce executable FAIL: 22_locale/codecvt/char16_t.cc (test for excess errors) UNRESOLVED: 22_locale/codecvt/char16_t.cc compilation failed to produce executable FAIL: 22_locale/codecvt/char32_t.cc (test for excess errors) UNRESOLVED: 22_locale/codecvt/char32_t.cc compilation failed to produce executable FAIL: 22_locale/codecvt/codecvt_utf8/79980.cc (test for excess errors) UNRESOLVED: 22_locale/codecvt/codecvt_utf8/79980.cc compilation failed to produce executable FAIL: 22_locale/codecvt/codecvt_utf8/wchar_t/1.cc (test for excess errors) UNRESOLVED: 22_locale/codecvt/codecvt_utf8/wchar_t/1.cc compilation failed to produce executable FAIL: 22_locale/codecvt/utf8.cc (test for excess errors) UNRESOLVED: 22_locale/codecvt/utf8.cc compilation failed to produce executable FAIL: 22_locale/conversions/string/2.cc (test for excess errors) UNRESOLVED: 22_locale/conversions/string/2.cc compilation failed to produce executable FAIL: 22_locale/conversions/string/3.cc (test for excess errors) UNRESOLVED: 22_locale/conversions/string/3.cc compilation failed to produce executable FAIL: experimental/string_view/literals/types.cc (test for excess errors) FAIL: experimental/string_view/literals/values.cc (test for excess errors) UNRESOLVED: experimental/string_view/literals/values.cc compilation failed to produce executable There would be similar errors running all the tests with -std=c++2a, which is definitely something I do often and so want the tests to be clean. Absolutely, agreed. We can either disable those tests when char8_t is enabled (because we already have alternative tests checking the char8_t versions of string_view etc.) or make them work either way, which the attached patch begins doing (more changes are needed). Since most of these tests exercise functionality that is not u8/char8_t specific, I think we should make them work. I expect a different set of failures for -fno-char8_t (which is probably a less important case to support that enabling char8_t in older standards, but maybe still worth testing now and then). I'm not sure it is less important. -fno-char8_t may be an important tool for some code bases during their initial testing of, and migration to, C++20. Tom.
Re: [PATCH] Fix up pre_and_rev_post_order_compute_fn
On February 7, 2019 11:37:29 PM GMT+01:00, Jakub Jelinek wrote: >Hi! > >I've noticed pre_and_rev_post_order_compute_fn is one of (apparently >many) >functions that take a struct function * argument and accept any >function, as >long as it is cfun. For a patch I've been working on I actually need >it >to handle other functions as well and in this case it is trivial to >fix. >I've seen several others e.g. in graph.c, though some of those are >harder to >deal with, as some there are calls to functions that don't have a >variant >with struct function * argument and assume cfun implicitly. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? EH... OK. Richard. >2019-02-07 Jakub Jelinek > > * cfganal.c (pre_and_rev_post_order_compute_fn): Use fn instead of > cfun everywhere. > >--- gcc/cfganal.c.jj 2019-01-01 12:37:17.915962514 +0100 >+++ gcc/cfganal.c 2019-02-07 20:47:29.035520143 +0100 >@@ -951,10 +951,10 @@ pre_and_rev_post_order_compute_fn (struc > bool include_entry_exit) > { > int pre_order_num = 0; >- int rev_post_order_num = n_basic_blocks_for_fn (cfun) - 1; >+ int rev_post_order_num = n_basic_blocks_for_fn (fn) - 1; > > /* Allocate stack for back-tracking up CFG. */ >- auto_vec stack (n_basic_blocks_for_fn (cfun) + >1); >+ auto_vec stack (n_basic_blocks_for_fn (fn) + 1); > > if (include_entry_exit) > { >@@ -968,7 +968,7 @@ pre_and_rev_post_order_compute_fn (struc > rev_post_order_num -= NUM_FIXED_BLOCKS; > > /* Allocate bitmap to track nodes that have been visited. */ >- auto_sbitmap visited (last_basic_block_for_fn (cfun)); >+ auto_sbitmap visited (last_basic_block_for_fn (fn)); > > /* None of the nodes in the CFG have been visited yet. */ > bitmap_clear (visited); > > Jakub
Re: [C++PATCH] [PR87322] move cp_evaluated up to tsubst all lambda parms
On Feb 7, 2019, Jason Merrill wrote: >> + PR c++/86322. */ > Wrong PR number. Thanks >> + if (local_specializations) >> +if (tree r = retrieve_local_specialization (t)) >> + return r; > Hmm, I would expect this to do the wrong thing for pack expansion of a > lambda, giving us only one rather than one per element of the > expansion. Oooh. Indeed, I hadn't realized this was possible. I think we should separate local_specializations arising from different pack expansion bindings, considering they won't always appear in tsubst args: diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 0177d7f77891..9f954698f454 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -11698,6 +11698,10 @@ gen_elem_of_pack_expansion_instantiation (tree pattern, ARGUMENT_PACK_SELECT_INDEX (aps) = index; } + // Any local specialization bindings arising from this substitution + // cannot be reused for a different INDEX. + local_specialization_stack lss (lss_copy); + /* Substitute into the PATTERN with the (possibly altered) arguments. */ if (pattern == in_decl) > For instance, in lambda-variadic5.C we should get three > instantiations of the "print" function template; can you add that > check to the test? This causes lambda-variadic5.C to get 6 separate lambdas, instead of 2, so it passes after the following patchlet: diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-variadic5.C b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-variadic5.C index 97f64cd761a7..1f931757b72f 100644 --- a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-variadic5.C +++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-variadic5.C @@ -1,5 +1,7 @@ // PR c++/47226 // { dg-do compile { target c++11 } } +// { dg-options "-fdump-tree-original" } +// { dg-final { scan-tree-dump-times ":: \\(null\\)" 6 "original" } } template void print(const T&) {} Now, should I adjust lambda-variadic5.C to check for the presence of the expencted count of lambdas in compiler dumps? Or should I go for a new test? Thinking of how to test for it drew my attention to issues about lambda naming, that may have implications on ABI and duplicate code removal. There are two issues I'm concerned with: - we increment lambda_cnt when introducing a lambda within a generic, and then increment it again as we specialize, partially or fully, each lambda expr/object/type set. This makes for gaps in numbering of full specializations and, being a global counter, does not ensure the assignment of the same symbol name to corresponding instantiations in separate translation units - we don't seem to attempt to reuse lambdas across different specializations of argument pack expansions. I guess that's ok, we don't attempt to reuse them across different specializations of the enclosing template function or class either, but, unlike enclosing contexts, different argument pack indices do not mandate different mangled names. This came up because I was thinking of having a sub-map in local_specializations, so that the lambda would map to another map in which we could look up template arguments to find a specific instance. That wouldn't work, in part because args does not provide the entire set of substitutions, and in another part because we don't seem to be able to identify, in patterns undergoing substitution but before actually performing the substitution, the set of (implied?) template arguments that might affect the substitution result. To make this concrete: in [&t]{return([]{return(0);}()+t);}... we might be able to reuse the nested lambda across expansions of the enclosing lambda. except that, being nested, their contexts wouldn't allow that. but what if they were siblings, say: ([&t]{return(t);}()+[]{return(0);}())... here the latter is in no way affected by the t pack, so we could create a single lambda, reusing it for all indices in t. The patchlet above will stand in the way of it, but IIUC it doesn't matter, we're not supposed/expected to do that, even when the decision on whether or not to reuse might affect symbol naming, e.g. if it appears in an initializer of a data member. Here's the patch I'm testing. OK if it passes? [PR87322] move cp_evaluated up to tsubst all lambda parms From: Alexandre Oliva A lambda capture variable initialized with a lambda expr taking more than one parameter got us confused. The first problem was that the parameter list was cut short during tsubsting because we tsubsted it with cp_unevaluated_operand. We reset it right after, to tsubst the function body, so I've moved the reset up so that it's in effect while processing the parameters as well. The second problem was that the lambda expr appeared twice, once in a decltype that gave the capture variable its type, and once in its initializer. This caused us to instantiate two separate lambda exprs and closure types, and then to flag that the lambda expr in the initializer could not be conver
Re: [C++PATCH] [PR86379] do not use TREE_TYPE for USING_DECL_SCOPE
On Feb 7, 2019, Jason Merrill wrote: > OK, that makes sense; it isn't always clear what the right handling of > a USING_DECL is. Indeed. Like, in shared_member_p, I'm wondering if we shouldn't recurse if the USING_DECL maps to an overload, that IIUC might contain static and non-static functions, other using decls, and maybe even types. > In protected_accessible_p and shared_member_p, if we're left with a > USING_DECL after strip_using_decl, we can't give a meaningful answer, > and should probably abort; we shouldn't get here with a dependent > expression. Ah, that's good knowledge to have. I've added asserts and comments. Here's what I'm testing. [PR86379] do not use TREE_TYPE for USING_DECL_SCOPE From: Alexandre Oliva It's too risk to reuse the type field for USING_DECL_SCOPE. Language-independent parts of the compiler, such as location and non-lvalue wrappers, happily take the TREE_TYPE of a USING_DECL as if it was a type rather than an unrelated scope. For better or worse, USING_DECLs use the non-common struct so we can use the otherwise unused result field. Adjust fallout, from uses of TREE_TYPE that were supposed to be USING_DECL_SCOPE, to other accidental uses of TREE_TYPE of a USING_DECL. for gcc/cp/ChangeLog PR c++/86379 * cp-tree.h (USING_DECL_SCOPE): Use result rather than type. * name-lookup.c (strip_using_decl): Use USING_DECL_SCOPE. * search.c (protected_accessible_p): Follow USING_DECL_DECLS. (shared_member_p): Likewise. (lookup_member): Likewise. * decl.c (grok_special_member_properties): Skip USING_DECLs. * semantics.c (finish_omp_declare_simd_methods): Likewise. for gcc/testsuite/ChangeLog PR c++/86379 * g++.dg/cpp0x/pr86379.C: New. --- gcc/cp/cp-tree.h |2 gcc/cp/decl.c|3 gcc/cp/name-lookup.c |2 gcc/cp/search.c | 19 +++ gcc/cp/semantics.c |3 gcc/testsuite/g++.dg/cpp0x/pr86379.C | 207 ++ 6 files changed, 229 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr86379.C diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index dada3a6aa410..44a3620a539f 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -3293,7 +3293,7 @@ struct GTY(()) lang_decl { #define DECL_DEPENDENT_P(NODE) DECL_LANG_FLAG_0 (USING_DECL_CHECK (NODE)) /* The scope named in a using decl. */ -#define USING_DECL_SCOPE(NODE) TREE_TYPE (USING_DECL_CHECK (NODE)) +#define USING_DECL_SCOPE(NODE) DECL_RESULT_FLD (USING_DECL_CHECK (NODE)) /* The decls named by a using decl. */ #define USING_DECL_DECLS(NODE) DECL_INITIAL (USING_DECL_CHECK (NODE)) diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 65ba812deb67..373b79b844dc 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -13297,7 +13297,8 @@ grok_special_member_properties (tree decl) { tree class_type; - if (!DECL_NONSTATIC_MEMBER_FUNCTION_P (decl)) + if (TREE_CODE (decl) == USING_DECL + || !DECL_NONSTATIC_MEMBER_FUNCTION_P (decl)) return; class_type = DECL_CONTEXT (decl); diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index d7b9029b0a3a..959f43b02384 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -2100,7 +2100,7 @@ strip_using_decl (tree decl) using typename :: [opt] nested-name-specifier unqualified-id ; */ - decl = make_typename_type (TREE_TYPE (decl), + decl = make_typename_type (USING_DECL_SCOPE (decl), DECL_NAME (decl), typename_type, tf_error); if (decl != error_mark_node) diff --git a/gcc/cp/search.c b/gcc/cp/search.c index 0367e4952138..4c3fffda717c 100644 --- a/gcc/cp/search.c +++ b/gcc/cp/search.c @@ -623,6 +623,11 @@ protected_accessible_p (tree decl, tree derived, tree type, tree otype) if (!DERIVED_FROM_P (type, derived)) return 0; + /* DECL_NONSTATIC_MEMBER_P won't work for USING_DECLs. */ + decl = strip_using_decl (decl); + /* We don't expect or support dependent decls. */ + gcc_assert (TREE_CODE (decl) != USING_DECL); + /* [class.protected] When a friend or a member function of a derived class references @@ -928,8 +933,13 @@ shared_member_p (tree t) if (is_overloaded_fn (t)) { for (ovl_iterator iter (get_fns (t)); iter; ++iter) - if (DECL_NONSTATIC_MEMBER_FUNCTION_P (*iter)) - return 0; + { + tree decl = strip_using_decl (*iter); + /* We don't expect or support dependent decls. */ + gcc_assert (TREE_CODE (decl) != USING_DECL); + if (DECL_NONSTATIC_MEMBER_FUNCTION_P (decl)) + return 0; + } return 1; } return 0; @@ -1177,7 +1187,10 @@ lookup_member (tree xbasetype, tree name, int protect, bool want_type, && !really_overloaded_fn (rval)) { tree decl = is_overloaded_fn
[WIP PATCH] Improve tail call analysis and inliner EH clobber through variable life analysis (PR tree-optimization/89060)
Hi! The following patch uses a simple data flow to find live (addressable) variables at certain spots (for tail call discovery at the point of the potential tail call, so that we don't reject tail calls because of variables that can be known out of scope already so that people don't have to find ugly workarounds if they really need tail calls, and for the recently added inline EH pad clobber addition so that we don't add too many variables). Bootstrapped/regtested on x86_64-linux and i686-linux. Haven't gathered statistics on how often does it trigger yet. Shall I use double queue (pending/working sbitmaps) to speed it up (guess I could gather statistics if that helps reasonably)? But if so, perhaps add_scope_conflicts should change too. I haven't shared code with what add_scope_conflicts does because it isn't really that big chunk of code and would need many modifications to make it generic enough to handle add_scope_conflicts needs, plus as I wanted to make it a helper for other optimizations, I didn't want to use bb->aux etc. For tail call, I see another option to compute it unconditionally once at the start of the pass for all may_be_aliased auto_var_in_fn_p vars and then just walk a single bb with each (potential) tail call, instead of computing it for every potential tail call again (on the other side with perhaps smaller set of variables). In that case e.g. vars == NULL argument could imply the VAR_P && may_be_aliased && auto_var_in_fn_p test instead of bitmap_bit_p test, we could drop the stop_after argument and instead export the _1 function renamed to something to walk a single bb (or wrapper to it that would set up the structure) until stop_after. Thoughts on this? 2019-02-08 Jakub Jelinek PR tree-optimization/89060 * tree-ssa-live.h (compute_live_vars, destroy_live_vars): Declare. * tree-ssa-live.c: Include gimple-walk.h and cfganal.h. (struct compute_live_vars_data): New type. (compute_live_vars_visit, compute_live_vars_1, compute_live_vars, destroy_live_vars): New functions. * tree-tailcall.c (find_tail_calls): Perform variable life analysis before punting. * tree-inline.h (struct copy_body_data): Add eh_landing_pad_dest member. * tree-inline.c (add_clobbers_to_eh_landing_pad): Remove BB argument. Perform variable life analysis to select variables that really need clobbers added. (copy_edges_for_bb): Don't call add_clobbers_to_eh_landing_pad here, instead set id->eh_landing_pad_dest and assert it is the same. (copy_cfg_body): Call it here if id->eh_landing_pad_dest is non-NULL. * gcc.dg/tree-ssa/pr89060.c: New test. --- gcc/tree-ssa-live.h.jj 2019-01-01 12:37:16.967978068 +0100 +++ gcc/tree-ssa-live.h 2019-02-07 19:02:58.233530924 +0100 @@ -265,6 +265,8 @@ extern tree_live_info_p calculate_live_r extern void debug (tree_live_info_d &ref); extern void debug (tree_live_info_d *ptr); extern void dump_live_info (FILE *, tree_live_info_p, int); +extern vec compute_live_vars (struct function *, bitmap, gimple *); +extern void destroy_live_vars (vec &); /* Return TRUE if P is marked as a global in LIVE. */ --- gcc/tree-ssa-live.c.jj 2019-01-01 12:37:16.055993032 +0100 +++ gcc/tree-ssa-live.c 2019-02-07 19:34:33.046401259 +0100 @@ -41,6 +41,8 @@ along with GCC; see the file COPYING3. #include "stringpool.h" #include "attribs.h" #include "optinfo.h" +#include "gimple-walk.h" +#include "cfganal.h" static void verify_live_on_entry (tree_live_info_p); @@ -1194,8 +1196,142 @@ calculate_live_ranges (var_map map, bool return live; } + +/* Data structure for compute_live_vars* functions. */ +struct compute_live_vars_data { + /* Vector of bitmaps for live vars at the end of basic blocks, + indexed by bb->index. ACTIVE[ENTRY_BLOCK] must be empty bitmap, + ACTIVE[EXIT_BLOCK] is used for STOP_AFTER. */ + vec active; + /* Work bitmap of currently live variables. */ + bitmap work; + /* Bitmap of interesting variables. Variables with uids not in this + bitmap are not tracked. */ + bitmap vars; +}; +/* Callback for walk_stmt_load_store_addr_ops. If OP is a VAR_DECL with + uid set in DATA->vars, enter its uid into bitmap DATA->work. */ + +static bool +compute_live_vars_visit (gimple *, tree op, tree, void *pdata) +{ + compute_live_vars_data *data = (compute_live_vars_data *) pdata; + op = get_base_address (op); + if (op && VAR_P (op) && bitmap_bit_p (data->vars, DECL_UID (op))) +bitmap_set_bit (data->work, DECL_UID (op)); + return false; +} + +/* Helper routine for compute_live_vars, calculating the sets of live + variables at the end of BB, leaving the result in DATA->work. + If STOP_AFTER is non-NULL, stop processing after that stmt. */ + +static void +compute_live_vars_1 (basic_block bb, compute_live_vars_data *data, +gimple *stop_after) +{ + edge e; + edge_iterato
Re: Xfail gfortran.dg/pr79966.f90
On Thu, 7 Feb 2019, Rainer Orth wrote: > Hi Honza, > > > PR ipa/88711 > > * gfortran.dg/pr79966.f90: Xfail for time being. > > Index: ../../gcc/testsuite/gfortran.dg/pr79966.f90 > > === > > --- ../../gcc/testsuite/gfortran.dg/pr79966.f90 (revision 268579) > > +++ ../../gcc/testsuite/gfortran.dg/pr79966.f90 (working copy) > > @@ -108,5 +108,5 @@ contains > > > >call RunTPTests() > >end program > > - > > -! { dg-final { scan-ipa-dump "Inlined tp_sum/\[0-9\]+ into > > runtptests/\[0-9\]+" "inline" } } > > +! See PR88711. Inliner is currently not able to figure out that inlining > > tp_sum is a good idea. > > +! { dg-final { scan-ipa-dump "Inlined tp_sum/\[0-9\]+ into > > runtptests/\[0-9\]+" "inline" { xfail spu-*-* } } } > > why restrict the xfail to spu only? According to gcc-testresults, the > test currently fails everywhere (aarch64, arm*, hppa*, i?86, ia64, m68k, > powerpc*, riscv64, sparc*, x86_64). OK if you xfail unconditionally on *-*-* Richard.