Re: [FYI] pass --enable-obsolete to build configure

2019-11-03 Thread Andreas Schwab
On Nov 02 2019, Alexandre Oliva wrote:

> @@ -903,6 +904,7 @@ infodir
>  docdir
>  oldincludedir
>  includedir
> +runstatedir
>  localstatedir
>  sharedstatedir
>  sysconfdir

Don't add unreleated changes.

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."


Re: [PATCH v2] [PR91979] Updated the fix:

2019-11-03 Thread Jason Merrill

On 11/1/19 11:47 PM, Kamlesh Kumar wrote:

Changlogs
gcc
--
2019-11-02  Kamlesh Kumar  

 PR c++/91979 - mangling nullptr expression
 * mangle.c (write_template_arg_literal): Handle nullptr
 mangling.
 * testsuite/g++.dg/cpp0x/nullptr27.C: Modify Test.
 * testsuite/g++.dg/cpp0x/pr91979.C: New Test.

libiberty
---
2019-11-02  Kamlesh Kumar  

 * cp-demangle.c (d_expr_primary): Handle
 nullptr demangling.
 * testsuite/demangle-expected: Added test.

gcc/c-family
--
2019-11-02  Kamlesh Kumar  

 * c-opts.c (c_common_post_options): Updated
 latest_abi_version.
---
  gcc/c-family/c-opts.c  |  2 +-
  gcc/cp/mangle.c|  4 +++-
  gcc/testsuite/g++.dg/cpp0x/nullptr27.C |  2 +-
  gcc/testsuite/g++.dg/cpp0x/pr91979.C   | 15 +++
  libiberty/cp-demangle.c| 11 +++
  libiberty/testsuite/demangle-expected  |  4 
  6 files changed, 35 insertions(+), 3 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr91979.C

diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 0fffe60b140..d4c77be5cd5 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -937,7 +937,7 @@ c_common_post_options (const char **pfilename)
  
/* Change flag_abi_version to be the actual current ABI level, for the

   benefit of c_cpp_builtins, and to make comparison simpler.  */
-  const int latest_abi_version = 13;
+  const int latest_abi_version = 14;


Please also add a comment about ABI v14 above fabi-version= in 
gcc/common.opt.



/* Generate compatibility aliases for ABI v11 (7.1) by default.  */
const int abi_compat_default = 11;
  
diff --git a/gcc/cp/mangle.c b/gcc/cp/mangle.c

index a9333b84349..234a975781e 100644
--- a/gcc/cp/mangle.c
+++ b/gcc/cp/mangle.c
@@ -3400,7 +3400,9 @@ write_template_arg_literal (const tree value)
case INTEGER_CST:
gcc_assert (!same_type_p (TREE_TYPE (value), boolean_type_node)
|| integer_zerop (value) || integer_onep (value));
-   write_integer_cst (value);
+   if (abi_version_at_least(14)
+   && !NULLPTR_TYPE_P (TREE_TYPE (value)))


This logic is wrong: it will never emit the value for ABI version lower 
than 14.  And you need a space before (.



+ write_integer_cst (value);
break;
  
case REAL_CST:

diff --git a/gcc/testsuite/g++.dg/cpp0x/nullptr27.C 
b/gcc/testsuite/g++.dg/cpp0x/nullptr27.C
index 2510dc80634..edd11606266 100644
--- a/gcc/testsuite/g++.dg/cpp0x/nullptr27.C
+++ b/gcc/testsuite/g++.dg/cpp0x/nullptr27.C
@@ -1,7 +1,7 @@
  // PR c++/52706
  // { dg-do compile { target c++11 } }
  // { dg-options "-fabi-version=0" }
-// { dg-final { scan-assembler "_Z1fIDnLDn0EEiT_" } }
+// { dg-final { scan-assembler "_Z1fIDnLDnEEiT_" } }
  
  template

  int f(T);


Please also add a test for the old mangled name with -fabi-version=13.


diff --git a/gcc/testsuite/g++.dg/cpp0x/pr91979.C 
b/gcc/testsuite/g++.dg/cpp0x/pr91979.C
new file mode 100644
index 000..7fcd56b27f0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/pr91979.C
@@ -0,0 +1,15 @@
+// PR c++/91989
+// { dg-do compile { target c++11 } }
+// { dg-final { scan-assembler "_Z3fooILPv0EEvPN9enable_ifIXeqT_LDnEEvE4typeE" 
} }
+
+template 
+struct enable_if {};
+
+template 
+struct enable_if { typedef T type; };
+
+template 
+void foo(typename enable_if::type* = 0) {}
+
+template void foo<(void *)0>(void *);
+
diff --git a/libiberty/cp-demangle.c b/libiberty/cp-demangle.c
index 5b674d7d93c..3fb1c56409e 100644
--- a/libiberty/cp-demangle.c
+++ b/libiberty/cp-demangle.c
@@ -3577,6 +3577,17 @@ d_expr_primary (struct d_info *di)
  && type->u.s_builtin.type->print != D_PRINT_DEFAULT)
di->expansion -= type->u.s_builtin.type->len;
  
+  if (type->type == DEMANGLE_COMPONENT_BUILTIN_TYPE

+ && strncmp (type->u.s_builtin.type->name,
+ cplus_demangle_builtin_types[33].name, 17) == 0)


Let's use strcmp rather than hardcode 17 here.


+   {
+ if (d_peek_char (di) == 'E')
+   {
+ d_advance (di, 1);
+ return type;
+   }
+   }
+
/* Rather than try to interpret the literal value, we just
 collect it as a string.  Note that it's possible to have a
 floating point literal here.  The ABI specifies that the
diff --git a/libiberty/testsuite/demangle-expected 
b/libiberty/testsuite/demangle-expected
index 61681484d0e..f68a8a71aaf 100644
--- a/libiberty/testsuite/demangle-expected
+++ b/libiberty/testsuite/demangle-expected
@@ -1446,3 +1446,7 @@ Foo()::X::fn
  _ZZZ3FooIiEfvENKUlT_E_clIcEEDaS0_EN1X2fnEv
  Foo()::{lambda(auto:1)#1}::operator()(char) const::X::fn()
  Foo()::{lambda(auto:1)#1}::operator()(char) const::X::fn
+#PR91979 demangling nullptr expression
+
+_Z3fooILPv0EEvPN9enable_ifIXeqT_LDnEEvE4typeE
+void foo<(void*)0>(enable_if<((void*)0)==((declty

[Darwin, testsuite, committed] Fix PR 79274

2019-11-03 Thread Iain Sandoe
The solution for initialising global TLS variables does not apply
to platforms using emulated TLS.   On later branches this test requires
native TLS.  Rather than make that change (especially late on gcc-7)
I’ve  XFAILed the test for this on Darwin.

tested on x86_64-darwin16, powerpc-darwin9, x86_64-linux-gnu
applied to gcc-8 and gcc-7 branches
thanks
Iain

gcc/testsuite/

2019-11-03  Iain Sandoe  

PR c++/79274
* g++.dg/tls/pr77285-2.C: XFAIL test for Darwin.

diff --git a/gcc/testsuite/g++.dg/tls/pr77285-2.C 
b/gcc/testsuite/g++.dg/tls/pr77285-2.C
index bac273a4d6..459ecc6555 100644
--- a/gcc/testsuite/g++.dg/tls/pr77285-2.C
+++ b/gcc/testsuite/g++.dg/tls/pr77285-2.C
@@ -3,7 +3,7 @@
 // { dg-require-effective-target tls }
 // { dg-final { scan-assembler "_Z4var1B3tag" } }
 // { dg-final { scan-assembler "_Z4var2B3tag" } }
-// { dg-final { scan-assembler "_ZTH4var1B3tag" } }
+// { dg-final { scan-assembler "_ZTH4var1B3tag" { xfail *-*-darwin* } } }
 // { dg-final { scan-assembler "_ZTW4var1B3tag" } }
 
 struct __attribute__((abi_tag("tag"))) X { ~X () {} int i = 0; };



[PATCH v3] Updated the Fix:

2019-11-03 Thread Kamlesh Kumar
ChangeLog Entries:

gcc/cp
--
2019-11-02  Kamlesh Kumar  

PR c++/91979 - mangling nullptr expression
* cp/mangle.c (write_template_arg_literal): Handle nullptr
mangling.

gcc
--
2019-11-02  Kamlesh Kumar  

* common.opt (-fabi-version): Added Description.
* testsuite/g++.dg/cpp0x/nullptr27.C: Modify Test.
* testsuite/g++.dg/cpp0x/nullptr43.C: New Test.
* testsuite/g++.dg/cpp0x/nullptr44.C: New Test.

libiberty
---
2019-11-02  Kamlesh Kumar  

* cp-demangle.c (d_expr_primary): Handle
nullptr demangling.
* testsuite/demangle-expected: Added test.

gcc/c-family
--
2019-11-02  Kamlesh Kumar  

* c-opts.c (c_common_post_options): Updated
latest_abi_version.
---
 gcc/c-family/c-opts.c  |  2 +-
 gcc/common.opt |  2 ++
 gcc/cp/mangle.c|  4 +++-
 gcc/testsuite/g++.dg/cpp0x/nullptr27.C |  2 +-
 gcc/testsuite/g++.dg/cpp0x/nullptr43.C |  9 +
 gcc/testsuite/g++.dg/cpp0x/nullptr44.C | 15 +++
 libiberty/cp-demangle.c| 11 +++
 libiberty/testsuite/demangle-expected  |  4 
 8 files changed, 46 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/nullptr43.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/nullptr44.C

diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 0fffe60b140..d4c77be5cd5 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -937,7 +937,7 @@ c_common_post_options (const char **pfilename)
 
   /* Change flag_abi_version to be the actual current ABI level, for the
  benefit of c_cpp_builtins, and to make comparison simpler.  */
-  const int latest_abi_version = 13;
+  const int latest_abi_version = 14;
   /* Generate compatibility aliases for ABI v11 (7.1) by default.  */
   const int abi_compat_default = 11;
 
diff --git a/gcc/common.opt b/gcc/common.opt
index cc279f411d7..fdd923e3c35 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -951,6 +951,8 @@ Driver Undocumented
 ; 13: Fixes the accidental change in 12 to the calling convention for classes
 ; with deleted copy constructor and trivial move constructor.
 ; Default in G++ 8.2.
+; 14: Corrects the mangling of nullptr expression.
+; Default in G++ 10.
 ;
 ; Additional positive integers will be assigned as new versions of
 ; the ABI become the default version of the ABI.
diff --git a/gcc/cp/mangle.c b/gcc/cp/mangle.c
index a9333b84349..a32ff2a2210 100644
--- a/gcc/cp/mangle.c
+++ b/gcc/cp/mangle.c
@@ -3400,7 +3400,9 @@ write_template_arg_literal (const tree value)
   case INTEGER_CST:
gcc_assert (!same_type_p (TREE_TYPE (value), boolean_type_node)
|| integer_zerop (value) || integer_onep (value));
-   write_integer_cst (value);
+   if (!(abi_version_at_least (14)
+   && NULLPTR_TYPE_P (TREE_TYPE (value
+ write_integer_cst (value);
break;
 
   case REAL_CST:
diff --git a/gcc/testsuite/g++.dg/cpp0x/nullptr27.C 
b/gcc/testsuite/g++.dg/cpp0x/nullptr27.C
index 2510dc80634..edd11606266 100644
--- a/gcc/testsuite/g++.dg/cpp0x/nullptr27.C
+++ b/gcc/testsuite/g++.dg/cpp0x/nullptr27.C
@@ -1,7 +1,7 @@
 // PR c++/52706
 // { dg-do compile { target c++11 } }
 // { dg-options "-fabi-version=0" }
-// { dg-final { scan-assembler "_Z1fIDnLDn0EEiT_" } }
+// { dg-final { scan-assembler "_Z1fIDnLDnEEiT_" } }
 
 template
 int f(T);
diff --git a/gcc/testsuite/g++.dg/cpp0x/nullptr43.C 
b/gcc/testsuite/g++.dg/cpp0x/nullptr43.C
new file mode 100644
index 000..fbdb6cd8e9d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/nullptr43.C
@@ -0,0 +1,9 @@
+// PR c++/91979
+// { dg-do compile { target c++11 } }
+// { dg-options "-fabi-version=13" }
+// { dg-final { scan-assembler "_Z1fIDnLDn0EEiT_" } }
+
+template
+int f(T);
+
+int i2 = f(nullptr);
diff --git a/gcc/testsuite/g++.dg/cpp0x/nullptr44.C 
b/gcc/testsuite/g++.dg/cpp0x/nullptr44.C
new file mode 100644
index 000..9ceba14fc98
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/nullptr44.C
@@ -0,0 +1,15 @@
+// PR c++/91979
+// { dg-do compile { target c++11 } }
+// { dg-final { scan-assembler "_Z3fooILPv0EEvPN9enable_ifIXeqT_LDnEEvE4typeE" 
} }
+
+template 
+struct enable_if {};
+
+template 
+struct enable_if { typedef T type; };
+
+template 
+void foo(typename enable_if::type* = 0) {}
+
+template void foo<(void *)0>(void *);
+
diff --git a/libiberty/cp-demangle.c b/libiberty/cp-demangle.c
index 5b674d7d93c..3150efff80d 100644
--- a/libiberty/cp-demangle.c
+++ b/libiberty/cp-demangle.c
@@ -3577,6 +3577,17 @@ d_expr_primary (struct d_info *di)
  && type->u.s_builtin.type->print != D_PRINT_DEFAULT)
di->expansion -= type->u.s_builtin.type->len;
 
+  if (type->type == DEMANGLE_COMPONENT_BUILTIN_TYPE
+ && strcmp (type->u.s_builtin.type->name,
+ cplus_demangle_builtin_types[33].name) == 0)
+   {
+ if (d_

Re: [RFH][libgcc] fp-bit bit ordering (PR 78804)

2019-11-03 Thread Oleg Endo
On Fri, 2019-10-11 at 23:27 +0900, Oleg Endo wrote:
> On Thu, 2019-10-03 at 19:34 -0600, Jeff Law wrote:
> > 
> > So probably the most interesting target for this test is v850-elf
> > as
> > it's got a reasonably well functioning simulator, hard and soft FP
> > targets, little endian, and I'm familiar with its current set of
> > failures.
> > 
> > I can confirm that your patch makes no difference in the test
> > results
> > (which includes execution results).
> > 
> > In fact, there haven't been any problems on any target in my tester
> > that
> > I can tie back to this change.
> > 
> > At this point I'd say let's go for it.
> > 
> 
> Thanks, Jeff.  I'll commit it to trunk if there are no further
> objections some time next week.
> 

I've just committed it as r277752.

Personally I'd like to install it on GCC 8 and 9 branches as well.
Any thoughts on that?

Cheers,
Oleg




Re: [PATCH][RFC] C++-style iterators for FOR_EACH_IMM_USE_STMT

2019-11-03 Thread Oleg Endo
On Wed, 2019-10-30 at 10:27 +0100, Richard Biener wrote:
> 
> Hmm, not sure - I'd like to write
> 
>  for (gimple *use_stmt : imm_stmt_uses (SSAVAR))
>for (use_operand_p use_p :  from 
> above>)
>  ...
> 
> I don't see how that's possible.  It would need to be "awkward" like
> 
>  for (auto it : imm_stmt_uses (SSAVAR))
>{
>  gimple *use_stmt = *it;
>  for (use_operand_p use_p : it)
>...
>}
> 
> so the first loops iteration object are the actual iterator and you'd
> have to do extra indirection to get at the actual stmt you iterated
> to.
> 
> So I'd extend C++ (hah) to allow
> 
>   for (gimple *use_stmt : imm_stmt_uses (SSAVAR))
> for (use_operand_p use_p : auto)
>   ...
> 
> where 'auto' magically selects the next iterator object in scope
> [that matches].
> 
> ;)

Have you applied for a patent yet? :D

How about this one?

for (gimple* use_stmt : imm_stmt_uses (SSAVAR))
  for (use_operand_p use_p : imm_uses_on_stmt (*use_stmt))

... where helper function "imm_uses_on_stmt" returns a range object
that offers a begin and end function and its own iterator type.


Another concept that could be interesting are filter iterators.

We used a simplistic re-implementation (c++03) to avoid dragging in
boost when working on AMS
https://github.com/erikvarga/gcc/blob/master/gcc/filter_iterator.h

Example uses are
https://github.com/erikvarga/gcc/blob/master/gcc/ams.h#L845
https://github.com/erikvarga/gcc/blob/master/gcc/ams.cc#L3715


I think there are also some places in RTL where filter iterators could
be used, e.g. "iterate over all MEMs in an RTL" could be made to look
something like that:

  for (auto&& i : filter_rtl (my_rtl_obj, MEM_P))
   ...


Anyway, maybe it can plant some ideas.

Cheers,
Oleg



Introduce ipa_inline_call_context class

2019-11-03 Thread Jan Hubicka
Hi,
this patch refactors estimate_node_size_and_time so we can store the
context call size/time estimates was calculated for.  The goal is to
prevent recalculation of these properties for calls from different call
sites but with same context and instead cache the results.

Honza

Bootstrapped/regtested x86_64-linux, will commit it shortly.

* ipa-fnsummary.c (ipa_call_context): New constructor.
(estimate_node_size_and_time): Turn to ...
(ipa_call_context::estimate_size_and_time): ... this one.
(ipa_call_context::release): New.
* ipa-fnsummary.h (ipa_call_context): New class.
(estimate_node_size_and_time): Remove.
(do_estimate_edge_time, do_estimate_edge_size,
do_estimate_edge_hints): Update.
===
--- ipa-fnsummary.c (revision 277753)
+++ ipa-fnsummary.c (working copy)
@@ -2940,31 +2940,54 @@ estimate_calls_size_and_time (struct cgr
 }
 }
 
+/* Default constructor for ipa call context.
+   Memory alloction of known_vals, known_contexts
+   and known_aggs vectors is owned by the caller, but can
+   be release by ipa_call_context::release.  
+   
+   inline_param_summary is owned by the caller.  */
+ipa_call_context::ipa_call_context (cgraph_node *node,
+   clause_t possible_truths,
+   clause_t nonspec_possible_truths,
+   vec known_vals,
+   vec
+known_contexts,
+   vec known_aggs,
+   vec
+inline_param_summary)
+: m_node (node), m_possible_truths (possible_truths),
+  m_nonspec_possible_truths (nonspec_possible_truths),
+  m_inline_param_summary (inline_param_summary),
+  m_known_vals (known_vals),
+  m_known_contexts (known_contexts),
+  m_known_aggs (known_aggs)
+{
+}
+
+/* Release memory used by known_vals/contexts/aggs vectors.  */
+
+void
+ipa_call_context::release ()
+{
+  m_known_vals.release ();
+  m_known_contexts.release ();
+  m_known_aggs.release ();
+}
 
-/* Estimate size and time needed to execute NODE assuming
-   POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
-   information about NODE's arguments.  If non-NULL use also probability
-   information present in INLINE_PARAM_SUMMARY vector.
+/* Estimate size and time needed to execute call in the given context.
Additionally detemine hints determined by the context.  Finally compute
minimal size needed for the call that is independent on the call context and
can be used for fast estimates.  Return the values in RET_SIZE,
RET_MIN_SIZE, RET_TIME and RET_HINTS.  */
 
 void
-estimate_node_size_and_time (struct cgraph_node *node,
-clause_t possible_truths,
-clause_t nonspec_possible_truths,
-vec known_vals,
-vec known_contexts,
-vec known_aggs,
-int *ret_size, int *ret_min_size,
-sreal *ret_time,
-sreal *ret_nonspecialized_time,
-ipa_hints *ret_hints,
-vec
-inline_param_summary)
+ipa_call_context::estimate_size_and_time (int *ret_size,
+ int *ret_min_size,
+ sreal *ret_time,
+ sreal *ret_nonspecialized_time,
+ ipa_hints *ret_hints)
 {
-  class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
+  class ipa_fn_summary *info = ipa_fn_summaries->get_create (m_node);
   size_time_entry *e;
   int size = 0;
   sreal time = 0;
@@ -2976,13 +2999,13 @@ estimate_node_size_and_time (struct cgra
 {
   bool found = false;
   fprintf (dump_file, "   Estimating body: %s/%i\n"
-  "   Known to be false: ", node->name (),
-  node->order);
+  "   Known to be false: ", m_node->name (),
+  m_node->order);
 
   for (i = predicate::not_inlined_condition;
   i < (predicate::first_dynamic_condition
+ (int) vec_safe_length (info->conds)); i++)
-   if (!(possible_truths & (1 << i)))
+   if (!(m_possible_truths & (1 << i)))
  {
if (found)
  fprintf (dump_file, ", ");
@@ -2991,19 +3014,19 @@ estimate_node_size_and_time (struct cgra
  }
 }
 
-  estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, 
possible_truths,
-   known_vals, known_contexts, known_aggs);
+  estimate_calls_size_and_time (m_node, &size, &min_size, &time, &hints, 
m_possible_truths,
+   m_known

Re: [PATCH 1/2] [ARM,testsuite] Skip tests incompatible with -mpure-code

2019-11-03 Thread Christophe Lyon
Ping?

https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01357.html


On Fri, 18 Oct 2019 at 15:18, Christophe Lyon
 wrote:
>
> Hi,
>
> All these tests fail when using -mpure-code:
> * some force A or R profile
> * some use Neon
> * some use -fpic/-fPIC
> all of which are not supported by this option.
>
> OK?
>
> Thanks,
>
> Christophe


Re: [PATCH 2/2] [ARM] Add support for -mpure-code in thumb-1 (v6m)

2019-11-03 Thread Christophe Lyon
Ping?

https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01356.html

On Fri, 18 Oct 2019 at 15:18, Christophe Lyon
 wrote:
>
> Hi,
>
> This patch extends support for -mpure-code to all thumb-1 processors,
> by removing the need for MOVT.
>
> Symbol addresses are built using upper8_15, upper0_7, lower8_15 and
> lower0_7 relocations, and constants are built using sequences of
> movs/adds and lsls instructions.
>
> The extension of the *thumb1_movhf pattern uses always the same size
> (6) although it can emit a shorter sequence when possible. This is
> similar to what *arm32_movhf already does.
>
> CASE_VECTOR_PC_RELATIVE is now false with -mpure-code, to avoid
> generating invalid assembly code with differences from symbols from
> two different sections (the difference cannot be computed by the
> assembler).
>
> Tests pr45701-[12].c needed a small adjustment to avoid matching
> upper8_15 when looking for the r8 register.
>
> Test no-literal-pool.c is augmented with __fp16, so it now uses
> -mfp16-format=ieee.
>
> Test thumb1-Os-mult.c generates an inline code sequence with
> -mpure-code and computes the multiplication by using a sequence of
> add/shift rather than using the multiply instruction, so we skip it in
> presence of -mpure-code.
>
> With -mcpu=cortex-m0, the pure-code/no-literal-pool.c fails because
> code like:
> static char *p = "Hello World";
> char *
> testchar ()
> {
>   return p + 4;
> }
> generates 2 indirections (I removed non-essential directives/code)
> .section.rodata
> .LC0:
> .ascii  "Hello World\000"
> .data
> p:
> .word   .LC0
> .section.rodata
> .LC2:
> .word   p
> .section .text,"0x2006",%progbits
> testchar:
> push{r7, lr}
> add r7, sp, #0
> movsr3, #:upper8_15:#.LC2
> lslsr3, #8
> addsr3, #:upper0_7:#.LC2
> lslsr3, #8
> addsr3, #:lower8_15:#.LC2
> lslsr3, #8
> addsr3, #:lower0_7:#.LC2
> ldr r3, [r3]
> ldr r3, [r3]
> addsr3, r3, #4
> movsr0, r3
> mov sp, r7
> @ sp needed
> pop {r7, pc}
>
> By contrast, when using -mcpu=cortex-m4, the code looks like:
> .section.rodata
> .LC0:
> .ascii  "Hello World\000"
> .data
> p:
> .word   .LC0
> testchar:
> push{r7}
> add r7, sp, #0
> movwr3, #:lower16:p
> movtr3, #:upper16:p
> ldr r3, [r3]
> addsr3, r3, #4
> mov r0, r3
> mov sp, r7
> pop {r7}
> bx  lr
>
> I haven't found yet how to make code for cortex-m0 apply upper/lower
> relocations to "p" instead of .LC2. The current code looks functional,
> but could be improved.
>
> OK as-is?
>
> Thanks,
>
> Christophe


Optimize streaming in inline summaries

2019-11-03 Thread Jan Hubicka
Hi,
this patch makes inline_read_sections to allocate vector in proper size
saving bit of register scaling overhead.

Bootstrapped/regtested x86_64-linux, comitted.

* ipa-fnsummary.c (inline_read_section): Set vector size
ahead of time.

Index: ipa-fnsummary.c
===
--- ipa-fnsummary.c (revision 277754)
+++ ipa-fnsummary.c (working copy)
@@ -3614,6 +3614,8 @@ inline_read_section (struct lto_file_dec
 
   count2 = streamer_read_uhwi (&ib);
   gcc_assert (!info || !info->conds);
+  if (info)
+vec_safe_reserve_exact (info->conds, count2);
   for (j = 0; j < count2; j++)
{
  struct condition c;
@@ -3627,8 +3629,10 @@ inline_read_section (struct lto_file_dec
  c.by_ref = bp_unpack_value (&bp, 1);
  if (c.agg_contents)
c.offset = streamer_read_uhwi (&ib);
- c.param_ops = NULL;
  count3 = streamer_read_uhwi (&ib);
+ c.param_ops = NULL;
+ if (info)
+   vec_safe_reserve_exact (c.param_ops, count3);
  for (k = 0; k < count3; k++)
{
  struct expr_eval_op op;
@@ -3658,13 +3662,16 @@ inline_read_section (struct lto_file_dec
  fatal_error (UNKNOWN_LOCATION,
   "invalid fnsummary in LTO stream");
}
- vec_safe_push (c.param_ops, op);
+ if (info)
+   c.param_ops->quick_push (op);
}
  if (info)
-   vec_safe_push (info->conds, c);
+   info->conds->quick_push (c);
}
   count2 = streamer_read_uhwi (&ib);
   gcc_assert (!info || !info->size_time_table);
+  if (info && count2)
+vec_safe_reserve_exact (info->size_time_table, count2);
   for (j = 0; j < count2; j++)
{
  class size_time_entry e;
@@ -3675,7 +3682,7 @@ inline_read_section (struct lto_file_dec
  e.nonconst_predicate.stream_in (&ib);
 
  if (info)
-   vec_safe_push (info->size_time_table, e);
+   info->size_time_table->quick_push (e);
}
 
   p.stream_in (&ib);


Implement ipa_inline_call_context cache

2019-11-03 Thread Jan Hubicka
Hi,
this patch implements simple single entry LRU cache for computing size/time of
callgraph edges.   My original patch contained a hashtable for all contexts but
since I became more careful about memory allocation I decided to first go with
this simple patch.  For Firefox I get:

node context cache: 1266834 hits, 871097 misses, 345654 initializations

So we save about 51% of calculations which are the usual bottlenecks of the
greedy inliner. Calculating the estimate involves walking all edges out of
the given function and since we compute it for each caller, this may get
quadratic.

In the followup patches I will increase the effectivity of cache by dropping
unnecesary information and then I will evaluate what size of cahce works best.

Current perf profile of WPA on firefox looks as follows:
   2.98%  lto1-wpa-stream  libc-2.24.so   [.] _int_malloc
   2.75%  lto1-wpa-stream  lto1   [.] streamer_tree_cache_lookup
   2.16%  lto1-wpa libc-2.24.so   [.] _int_malloc
   2.13%  lto1-wpa lto1   [.] inflate_fast
   1.96%  lto1-wpa lto1   [.] record_target_from_binfo
   1.90%  lto1-wpa lto1   [.] types_same_for_odr
   1.37%  lto1-wpa lto1   [.] estimate_calls_size_and_time
   1.37%  lto1-wpa lto1   [.] 
symbol_table::remove_unreachable_nodes
   1.23%  lto1-wpa-stream  lto1   [.] streamer_write_uhwi_stream
   1.07%  lto1-wpa lto1   [.] type_possibly_instantiated_p
   1.07%  lto1-wpa libc-2.24.so   [.] malloc_consolidate
   1.02%  lto1-wpa-stream  lto1   [.] DFS::DFS
   1.02%  lto1-wpa lto1   [.] unify_scc
   1.02%  lto1-wpa lto1   [.] sreal::operator/
   1.01%  lto1-wpa-stream  [kernel.kallsyms]  [k] copy_page
   0.97%  lto1-wpa lto1   [.] fibonacci_heap::consolidate
   0.89%  lto1-wpa lto1   [.] evaluate_properties_for_edge
   0.81%  lto1-wpa [kernel.kallsyms]  [k] copy_page_range
   0.81%  lto1-wpa lto1   [.] splay_tree_splay.part.0
   0.80%  lto1-wpa-stream  lto1   [.] lto_output_tree
   0.79%  lto1-wpa lto1   [.] streamer_read_uhwi
   0.75%  lto1-wpa lto1   [.] 
gimple_get_virt_method_for_vtable
   0.74%  lto1-wpa lto1   [.] edge_badness
   0.74%  lto1-wpa libc-2.24.so   [.] int_mallinfo
   0.74%  lto1-wpa lto1   [.] rename_statics
   0.74%  lto1-wpa lto1   [.] 
possible_polymorphic_call_targets_1
   0.69%  lto1-wpa lto1   [.] compare_tree_sccs_1
   0.67%  lto1-wpa libc-2.24.so   [.] _int_free
   0.65%  lto1-wpa lto1   [.] fibonacci_heap::extract_minimum_node
   0.61%  lto1-wpa lto1   [.] htab_hash_string
   0.58%  lto1-wpa lto1   [.] ht_lookup_with_hash
   0.58%  lto1-wpa-stream  lto1   [.] linemap_ordinary_map_lookup
   0.57%  lto1-wpa lto1   [.] streamer_read_tree_bitfields
   0.56%  lto1-wpa lto1   [.] cgraph_node::get_availability
   0.56%  lto1-wpa lto1   [.] 
hash_table, edge_growth_cache_entry*, 
simple_hashmap_traits >, edge_gr

Cutting down record_target_from_binfo and types_same_for_odr is relatively easy
(we do not need to walk may edges in unreachable function removal each time).
estimate_calls_size_and_time was reduced by this patch (it used to be 1st of
lto1-wpa).


Bootstrapped/regtested x86_64-linux, will commit it shortly.

Honza
* ipa-fnsummary.c (ipa_call_context::duplicate_from): New
member function.
(ipa_call_context::release): Add ALL parameter.
(ipa_call_context::equal_to): New member function.
* ipa-fnsummary.h (ipa_call_context): Add empty constructor;
duplicate_form, release, equal_to and exists_p member functoins.
* ipa-inline-analysis.c (node_context_cache_entry): New
class.
(node_context_summary): Likewise.
(node_context_cache, node_context_cache_hit, node_context_cache_miss,
node_context_clear): New static vars.
(initialize_growth_caches): New function.
(free_growth_caches): Also delete node_context_cache; output stats.
(do_estimate_edge_time): Cache contexts.
(reset_node_cache): New function.
* ipa-inline.c (reset_edge_caches): Reset also node cache.
(inline_small_functions): Initialize growth caches.
* ipa-inline.h (reset_node_cache, initialize_growth_caches):
Declare.
* ipa-predicate.h (inline_param_summary::equal_to): New.
* ipa-prop.c (ipa_agg_jf_item::equal_to): New.
* ipa-prop.h (ipa_agg_jf_item): Declare equal_to member function.
(ipa_agg_jump_function): Implement equal_to member function.

Index: ipa-fnsummary.c
==

[patch, fortran] PR92123 - [F2018/array-descriptor] Scalar allocatable/pointer with array descriptor (via bind(C)): ICE with select rank or error scalar variable with POINTER or ALLOCATABLE in procedu

2019-11-03 Thread Paul Richard Thomas
The attached patch is verging on the obvious. Thanks to Tobias for
spotting Vipul's messages on the J3 list.

Regtests on FC30/x86_64 - OK for trunk and 9-branch?

Paul

2019-11-03  Paul Thomas  

PR fortran/92123
*decl.c (gfc_verify_c_interop_param): Remove error asserting
that pointer or allocatable variables in a bind C procedure are
not supported. Delete some trailing spaces.
* trans-stmt.c (trans_associate_var): Correct the attempt to
treat scalar pointer or allocatable temporaries as if they are
array descriptors.

2019-11-03  Paul Thomas  

PR fortran/92123
* gfortran.dg/bind_c_procs_3.f90 : New test.
* gfortran.dg/ISO_Fortran_binding_15.c : New test.
* gfortran.dg/ISO_Fortran_binding_15.f90 : Additional source.
Index: gcc/fortran/decl.c
===
*** gcc/fortran/decl.c	(revision 277531)
--- gcc/fortran/decl.c	(working copy)
*** gfc_verify_c_interop_param (gfc_symbol *
*** 1560,1574 
    sym->ns->proc_name->name))
  	retval = false;
  
- 	  if ((sym->attr.allocatable || sym->attr.pointer) && !sym->as)
- 	{
- 	  gfc_error ("Scalar variable %qs at %L with POINTER or "
- 			 "ALLOCATABLE in procedure %qs with BIND(C) is not yet"
- 			 " supported", sym->name, &(sym->declared_at),
- 			 sym->ns->proc_name->name);
- 	  retval = false;
- 	}
- 
  	  if (sym->attr.optional == 1 && sym->attr.value)
  	{
  	  gfc_error ("Variable %qs at %L cannot have both the OPTIONAL "
--- 1560,1565 
*** gfc_match_entry (void)
*** 7547,7553 
entry->attr.is_bind_c = 0;
  
loc = entry->old_symbol != NULL
! 	? entry->old_symbol->declared_at : gfc_current_locus; 
gfc_error_now ("BIND(C) attribute at %L can only be used for "
  		 "variables or common blocks", &loc);
   }
--- 7538,7544 
entry->attr.is_bind_c = 0;
  
loc = entry->old_symbol != NULL
! 	? entry->old_symbol->declared_at : gfc_current_locus;
gfc_error_now ("BIND(C) attribute at %L can only be used for "
  		 "variables or common blocks", &loc);
   }
*** gfc_match_derived_decl (void)
*** 10288,10294 
  }
  
/*  In free source form, need to check for TYPE XXX as oppose to TYPEXXX.
!   But, we need to simply return for TYPE(.  */ 
if (m == MATCH_NO && gfc_current_form == FORM_FREE)
  {
char c = gfc_peek_ascii_char ();
--- 10279,10285 
  }
  
/*  In free source form, need to check for TYPE XXX as oppose to TYPEXXX.
!   But, we need to simply return for TYPE(.  */
if (m == MATCH_NO && gfc_current_form == FORM_FREE)
  {
char c = gfc_peek_ascii_char ();
Index: gcc/fortran/trans-stmt.c
===
*** gcc/fortran/trans-stmt.c	(revision 277531)
--- gcc/fortran/trans-stmt.c	(working copy)
*** trans_associate_var (gfc_symbol *sym, gf
*** 1841,1850 
  	  if (rank > 0)
  	copy_descriptor (&se.post, se.expr, desc, rank);
  	  else
! 	{
! 	  tmp = gfc_conv_descriptor_data_get (desc);
! 	  gfc_conv_descriptor_data_set (&se.post, se.expr, tmp);
! 	}
  
  	  /* The dynamic type could have changed too.  */
  	  if (sym->ts.type == BT_CLASS)
--- 1841,1847 
  	  if (rank > 0)
  	copy_descriptor (&se.post, se.expr, desc, rank);
  	  else
! 	gfc_conv_descriptor_data_set (&se.post, se.expr, desc);
  
  	  /* The dynamic type could have changed too.  */
  	  if (sym->ts.type == BT_CLASS)
Index: gcc/testsuite/gfortran.dg/bind_c_procs_3.f90
===
*** gcc/testsuite/gfortran.dg/bind_c_procs_3.f90	(nonexistent)
--- gcc/testsuite/gfortran.dg/bind_c_procs_3.f90	(working copy)
***
*** 0 
--- 1,25 
+ ! { dg-do run }
+ !
+ ! Test the fix for PR92123, in which 'dat' caused an error with the message
+ ! "Scalar variable 'dat' at ?? with POINTER or ALLOCATABLE in procedure Fsub
+ ! with BIND(C) is not yet supported."
+ !
+ ! Contributed by Vipul Parekh  
+ !
+ module m
+use, intrinsic :: iso_c_binding, only : c_int
+ contains
+subroutine Fsub( dat ) bind(C, name="Fsub")
+   !.. Argument list
+   integer(c_int), allocatable, intent(out) :: dat
+   dat = 42
+   return
+end subroutine
+ end module m
+ 
+use, intrinsic :: iso_c_binding, only : c_int
+use m, only : Fsub
+integer(c_int), allocatable :: x
+call Fsub( x )
+if (x .ne. 42) stop 1
+ end
Index: gcc/testsuite/gfortran.dg/ISO_Fortran_binding_15.c
===
*** gcc/testsuite/gfortran.dg/ISO_Fortran_binding_15.c	(nonexistent)
--- gcc/testsuite/gfortran.dg/ISO_Fortran_binding_15.c	(working copy)
***
*** 0 
--- 1,41 
+ /* Test the fix for PR92123.  */
+ 
+ /* Contributed by Vipul Parekh*/
+ 
+ #include 
+ #include 
+ #include "../../../l

Re: [patch, fortran] PR92123 - [F2018/array-descriptor] Scalar allocatable/pointer with array descriptor (via bind(C)): ICE with select rank or error scalar variable with POINTER or ALLOCATABLE in pro

2019-11-03 Thread Steve Kargl


On Sun, Nov 03, 2019 at 06:36:59PM +, Paul Richard Thomas wrote:
> The attached patch is verging on the obvious. Thanks to Tobias for
> spotting Vipul's messages on the J3 list.
> 
> Regtests on FC30/x86_64 - OK for trunk and 9-branch?
> 

OK for both.

-- 
Steve


Re: [patch, fortran] Fix PR 92113

2019-11-03 Thread Steve Kargl
On Sat, Nov 02, 2019 at 10:38:32AM +0100, Thomas Koenig wrote:
> 
> the attached patch fixes an 8/9/10 regression where, to fix PR 84487
> by not putting the initializers and vtabs into the read-only section
> (for reasons of size, which could grow enormously) led to a regression
> on POWER9 and other non-x86 architectures, where the initializer was
> sometimes optimized away, depending on optimization levels.
> 
> This was a strange beast to hunt down. This only showed up on
> the testresults for gcc 8, so I tried to find out what commit
> had fixed this on trunk, in order to backport.
> 
> However, bisecting this I found that the test case actually
> segfaults all the way up to current trunk when run by hand.
> By running the testsuite, I didn't see it.  This is strange,
> and raises some issues about the testsuite and the possibility
> of a latent issue, but I lack the knowledge to hunt this down.
> 
> In the meantime, here is this patch, which puts the vtabs and
> the initializers where the user actually specified something
> into the read-only section again.
> 
> Test case: Well, theoretically it is already there, so it makes
> little sense to add a new one.
> 
> Regression-tested on powerpc64le-unknown-linux-gnu, also
> verified by hand that pr51434.f90 now passes with -O2 there.
> 
> OK for trunk/9/8?

OK for all three.

It is, as you have indicated, troublesome that a segfaulting
testcase isn't caught by the testsuite.

-- 
Steve


Re: [Patch, Fortran] PR90374 Support d0.d, e0.d, es0.d, en0.d, g0.d

2019-11-03 Thread Steve Kargl
On Fri, Nov 01, 2019 at 03:48:04PM -0700, Jerry DeLisle wrote:
> 
> The attached patch provides frontend and runtime modifications to allow the 
> subject format specifiers. These are allowed as default behavior and under 
> -std=f2018.
> 
> It does not implement the ew.de0 specifier. I decided to do that part 
> separarately since it involves different places in the code.
> 
> I will to a Changlog for the testsuite changes. In summary:
> 
> modified: fmt_error_10.f to allow it to pass.
> modified: fmt_error_7.f likewise.
> modified: fmt_error_9.f likewise.
> new file: fmt_zero_width.f90 to test the new features.
> 
> Regression tested on x86_64-pc-linux-gnu.
> 
> OK for trunk?
> 

OK.

-- 


Re: [patch, fortran] Fix PR 92113

2019-11-03 Thread Thomas Koenig

Hi Steve,


OK for trunk/9/8?


OK for all three.


Thanks, committed to trunk as r277760.

I'll be AFK for a few days, so I will have to wait before
committing this to gcc-9. Given the convoluted history of
this bug, this might not be a bad thing.

> It is, as you have indicated, troublesome that a segfaulting
> testcase isn't caught by the testsuite.

It certainly is, but I have no solution for this at the moment.

Thanks for the review!

Regards

Thomas





Improve effectivity of ipa_context_cache

2019-11-03 Thread Jan Hubicka
Hi,
this, somewhat verbose, patch make ipa-context-cache to only store the
information that is actually used by the size/time estimation. In
particular we now track what parameters are used in ipa predicates,
what parameters are used as targets of indirect calls and what
parameters are used for polymorphic calls.

So we have 3 new flags or every parameter of function:

  unsigned used_by_ipa_predicates : 1;
  unsigned used_by_indirect_call : 1;
  unsigned used_by_polymorphic_call : 1;

This greatly improves effectivity of the cache (from 51% to 75%) and
saves a lot of allocations because only info needed is actually stored
and compared for equality.

As followup I also plan to track what parameters are used by jump
function and drop useless jump function prior inlining. That should save
some memory and avoid extra legwork updating the info while we know it
is not being useful.

Martin, I would welcome double-checking that I did not missed any
update.  Basically the flags are:

 1) set by functions recording the indirect call and recording the
condition for predicates
 2) updated by functions manipulating these once function is inlined
 3) set by stream-in code (rather than being streamed as well, since it
is trivial to re-compute).

Bootstrapped/regtested x86_64-linux, comitted.

Honza

* ipa-fnsummary.c (set_cond_stmt_execution_predicate,
set_switch_stmt_execution_predicate, compute_bb_predicates,
will_be_nonconstant_expr_predicate,
phi_result_unknown_predicate,
analyze_function_body): Pass arround params summary.
(ipa_call_context::duplicate_from): New comment;
only duplicate useful values.
(ipa_call_context::equal_to): Only compare useful values.
(remap_edge_summaries): Pass params_summary.
(remap_hint_predicate): Likewise.
(ipa_merge_fn_summary_after_inlining): Likewise.
(inline_read_section): Initialize params summary used flags.
* ipa-predicate.c (predicate::remap_after_inlining): Pass
around param_summary.
(add_condition): Initialized used params summary flags.
* ipa-predicate.h (inline_param_summary::equals_to): Make const.
(inline_param_summary::useless_p): New predicate.
(remap_after_inlining, add_condition): Update prototype
* ipa-prop.c (ipa_populate_param_decls): Watch overflow in
move_cost.
(ipa_note_param_call): Add parameter POLYMORPHIC; update params
summaries.
(ipa_analyze_indirect_call_uses): Update use of ipa_note_param_call.
(ipa_analyze_virtual_call_uses): Likewise.
(update_indirect_edges_after_inlining): Update param summaries.
(ipa_print_node_params): Print used flags.
(ipa_read_indirect_edge_info): Update param summareis.
* ipa-prop.h (ipa_param_descriptor): Add
used_by_ipa_predicates, used_by_indirect_call
and used_by_polymorphic_call.
(ipa_set_param_used_by_ipa_predicates,
ipa_set_param_used_by_indirect_call,
ipa_set_param_used_by_polymorphic_call,
ipa_is_param_used_by_ipa_predicates,
ipa_is_param_used_by_indirect_call,
ipa_is_param_used_by_polymorphic_call): New inline functions.
Index: ipa-fnsummary.c
===
--- ipa-fnsummary.c (revision 277757)
+++ ipa-fnsummary.c (working copy)
@@ -1312,6 +1312,7 @@ fail:
 static void
 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
   class ipa_fn_summary *summary,
+  class ipa_node_params *params_summary,
   basic_block bb)
 {
   gimple *last;
@@ -1354,7 +1355,8 @@ set_cond_stmt_execution_predicate (struc
  && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
{
  predicate p
-   = add_condition (summary, index, param_type, &aggpos,
+   = add_condition (summary, params_summary, index,
+param_type, &aggpos,
 this_code, gimple_cond_rhs (last), param_ops);
  e->aux = edge_predicate_pool.allocate ();
  *(predicate *) e->aux = p;
@@ -1387,7 +1389,8 @@ set_cond_stmt_execution_predicate (struc
 return;
   FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
 {
-  predicate p = add_condition (summary, index, param_type, &aggpos,
+  predicate p = add_condition (summary, params_summary, index,
+  param_type, &aggpos,
   predicate::is_not_constant, NULL_TREE);
   e->aux = edge_predicate_pool.allocate ();
   *(predicate *) e->aux = p;
@@ -1401,6 +1404,7 @@ set_cond_stmt_execution_predicate (struc
 static void
 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
 class ipa_fn_summary *

Re: [PATCH] combine: Don't generate IF_THEN_ELSE

2019-11-03 Thread Andrew Pinski
On Thu, May 9, 2019 at 5:05 PM Segher Boessenkool
 wrote:
>
> On all targets I managed to test (21) this results in better code.  Only
> alpha ends up with slightly bigger code.
>
> Committing to trunk.

This introduced:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92342

Thanks,
Andrew Pinski

>
>
> Segher
>
>
> 2019-05-10  Segher Boessenkool  
>
> * combine.c (combine_simplify_rtx): Don't make IF_THEN_ELSE RTL.
>
> ---
>  gcc/combine.c | 8 
>  1 file changed, 8 deletions(-)
>
> diff --git a/gcc/combine.c b/gcc/combine.c
> index 7b236225..8c4375f 100644
> --- a/gcc/combine.c
> +++ b/gcc/combine.c
> @@ -5937,14 +5937,6 @@ combine_simplify_rtx (rtx x, machine_mode op0_mode, 
> int in_dest,
>  mode, 
> VOIDmode,
>  cond, cop1),
> mode);
> - else
> -   return gen_rtx_IF_THEN_ELSE (mode,
> -simplify_gen_relational 
> (cond_code,
> - mode,
> - 
> VOIDmode,
> - cond,
> - cop1),
> -true_rtx, false_rtx);
>
>   code = GET_CODE (x);
>   op0_mode = VOIDmode;
> --
> 1.8.3.1
>


Re: [PR47785] COLLECT_AS_OPTIONS

2019-11-03 Thread Kugan Vivekanandarajah
Thanks for the reviews.


On Sat, 2 Nov 2019 at 02:49, H.J. Lu  wrote:
>
> On Thu, Oct 31, 2019 at 6:33 PM Kugan Vivekanandarajah
>  wrote:
> >
> > On Wed, 30 Oct 2019 at 03:11, H.J. Lu  wrote:
> > >
> > > On Sun, Oct 27, 2019 at 6:33 PM Kugan Vivekanandarajah
> > >  wrote:
> > > >
> > > > Hi Richard,
> > > >
> > > > Thanks for the review.
> > > >
> > > > On Wed, 23 Oct 2019 at 23:07, Richard Biener 
> > > >  wrote:
> > > > >
> > > > > On Mon, Oct 21, 2019 at 10:04 AM Kugan Vivekanandarajah
> > > > >  wrote:
> > > > > >
> > > > > > Hi Richard,
> > > > > >
> > > > > > Thanks for the pointers.
> > > > > >
> > > > > >
> > > > > >
> > > > > > On Fri, 11 Oct 2019 at 22:33, Richard Biener 
> > > > > >  wrote:
> > > > > > >
> > > > > > > On Fri, Oct 11, 2019 at 6:15 AM Kugan Vivekanandarajah
> > > > > > >  wrote:
> > > > > > > >
> > > > > > > > Hi Richard,
> > > > > > > > Thanks for the review.
> > > > > > > >
> > > > > > > > On Wed, 2 Oct 2019 at 20:41, Richard Biener 
> > > > > > > >  wrote:
> > > > > > > > >
> > > > > > > > > On Wed, Oct 2, 2019 at 10:39 AM Kugan Vivekanandarajah
> > > > > > > > >  wrote:
> > > > > > > > > >
> > > > > > > > > > Hi,
> > > > > > > > > >
> > > > > > > > > > As mentioned in the PR, attached patch adds 
> > > > > > > > > > COLLECT_AS_OPTIONS for
> > > > > > > > > > passing assembler options specified with -Wa, to the 
> > > > > > > > > > link-time driver.
> > > > > > > > > >
> > > > > > > > > > The proposed solution only works for uniform -Wa options 
> > > > > > > > > > across all
> > > > > > > > > > TUs. As mentioned by Richard Biener, supporting non-uniform 
> > > > > > > > > > -Wa flags
> > > > > > > > > > would require either adjusting partitioning according to 
> > > > > > > > > > flags or
> > > > > > > > > > emitting multiple object files  from a single LTRANS CU. We 
> > > > > > > > > > could
> > > > > > > > > > consider this as a follow up.
> > > > > > > > > >
> > > > > > > > > > Bootstrapped and regression tests on  arm-linux-gcc. Is 
> > > > > > > > > > this OK for trunk?
> > > > > > > > >
> > > > > > > > > While it works for your simple cases it is unlikely to work 
> > > > > > > > > in practice since
> > > > > > > > > your implementation needs the assembler options be present at 
> > > > > > > > > the link
> > > > > > > > > command line.  I agree that this might be the way for people 
> > > > > > > > > to go when
> > > > > > > > > they face the issue but then it needs to be documented 
> > > > > > > > > somewhere
> > > > > > > > > in the manual.
> > > > > > > > >
> > > > > > > > > That is, with COLLECT_AS_OPTION (why singular?  I'd expected
> > > > > > > > > COLLECT_AS_OPTIONS) available to cc1 we could stream this 
> > > > > > > > > string
> > > > > > > > > to lto_options and re-materialize it at link time (and 
> > > > > > > > > diagnose mismatches
> > > > > > > > > even if we like).
> > > > > > > > OK. I will try to implement this. So the idea is if we provide
> > > > > > > > -Wa,options as part of the lto compile, this should be available
> > > > > > > > during link time. Like in:
> > > > > > > >
> > > > > > > > arm-linux-gnueabihf-gcc -march=armv7-a -mthumb -O2 -flto
> > > > > > > > -Wa,-mimplicit-it=always,-mthumb -c test.c
> > > > > > > > arm-linux-gnueabihf-gcc  -flto  test.o
> > > > > > > >
> > > > > > > > I am not sure where should we stream this. Currently, 
> > > > > > > > cl_optimization
> > > > > > > > has all the optimization flag provided for compiler and it is
> > > > > > > > autogenerated and all the flags are integer values. Do you have 
> > > > > > > > any
> > > > > > > > preference or example where this should be done.
> > > > > > >
> > > > > > > In lto_write_options, I'd simply append the contents of 
> > > > > > > COLLECT_AS_OPTIONS
> > > > > > > (with -Wa, prepended to each of them), then recover them in 
> > > > > > > lto-wrapper
> > > > > > > for each TU and pass them down to the LTRANS compiles (if they 
> > > > > > > agree
> > > > > > > for all TUs, otherwise I'd warn and drop them).
> > > > > >
> > > > > > Attached patch streams it and also make sure that the options are 
> > > > > > the
> > > > > > same for all the TUs. Maybe it is a bit restrictive.
> > > > > >
> > > > > > What is the best place to document COLLECT_AS_OPTIONS. We don't seem
> > > > > > to document COLLECT_GCC_OPTIONS anywhere ?
> > > > >
> > > > > Nowhere, it's an implementation detail then.
> > > > >
> > > > > > Attached patch passes regression and also fixes the original ARM
> > > > > > kernel build issue with tumb2.
> > > > >
> > > > > Did you try this with multiple assembler options?  I see you stream
> > > > > them as -Wa,-mfpu=xyz,-mthumb but then compare the whole
> > > > > option strings so a mismatch with -Wa,-mthumb,-mfpu=xyz would be
> > > > > diagnosed.  If there's a spec induced -Wa option do we get to see
> > > > > that as well?  I can imagine -march=xyz enabling a -Wa option
> > > > > for example.
> > > > >
> > > > > + *collect_as = XNEWVEC (cha

[PATCH v2] PR92090: Fix testcase failures by r276469

2019-11-03 Thread luoxhu
-finline-functions is enabled by default for O2 since r276469, update the
test cases with -fno-inline-functions.

v2: disable inlining for the failed cases.  Add two more failed cases
not listed in BZ.  Tested on P8LE, P8BE and P9LE.


gcc/testsuite/ChangeLog:

2019-10-30  Xiong Hu Luo  

PR92090
* g++.dg/tree-ssa/pr61034.C: Add -fno-inline-functions --param
max-inline-insns-single-O2=200.
* gcc.dg/atomic/c11-atomic-exec-5.c: Likewise.
* gcc.target/powerpc/pr72804.c: Likewie.
* gcc.target/powerpc/pr79439-1.c: Add -fno-inline-functions.
* gcc.target/powerpc/vsx-builtin-7.c: Likewise.
---
 gcc/testsuite/g++.dg/tree-ssa/pr61034.C  | 2 +-
 gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c  | 2 +-
 gcc/testsuite/gcc.target/powerpc/pr72804.c   | 2 +-
 gcc/testsuite/gcc.target/powerpc/pr79439-1.c | 2 +-
 gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr61034.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr61034.C
index 2e3dfecacb4..338de1ebb13 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/pr61034.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr61034.C
@@ -1,5 +1,5 @@
 // { dg-do compile }
-// { dg-options "-O2 -fdump-tree-fre3 -fdump-tree-optimized 
-fdelete-null-pointer-checks --param early-inlining-insns-O2=14" }
+// { dg-options "-O2 -fdump-tree-fre3 -fdump-tree-optimized 
-fdelete-null-pointer-checks --param early-inlining-insns-O2=14 
-fno-inline-functions --param max-inline-insns-single-O2=200"  }
 
 #define assume(x) if(!(x))__builtin_unreachable()
 
diff --git a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c 
b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
index 692c64ad207..0d70a769fdd 100644
--- a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
+++ b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
@@ -3,7 +3,7 @@
iterations of the compare-and-exchange loop are needed, exceptions
get properly cleared).  */
 /* { dg-do run } */
-/* { dg-options "-std=c11 -pedantic-errors -pthread -U_POSIX_C_SOURCE 
-D_POSIX_C_SOURCE=200809L" } */
+/* { dg-options "-std=c11 -pedantic-errors -pthread -U_POSIX_C_SOURCE 
-D_POSIX_C_SOURCE=200809L -fno-inline-functions --param 
max-inline-insns-single-O2=200" } */
 /* { dg-add-options ieee } */
 /* { dg-additional-options "-mfp-trap-mode=sui" { target alpha*-*-* } } */
 /* { dg-additional-options "-D_XOPEN_SOURCE=600" { target *-*-solaris2* } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr72804.c 
b/gcc/testsuite/gcc.target/powerpc/pr72804.c
index b83b6350d75..0fc3df1d89b 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr72804.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr72804.c
@@ -1,6 +1,6 @@
 /* { dg-do compile { target { lp64 } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-require-effective-target powerpc_vsx_ok  -fno-inline-functions --param 
max-inline-insns-single-O2=200 } */
 /* { dg-options "-O2 -mvsx" } */
 
 __int128_t
diff --git a/gcc/testsuite/gcc.target/powerpc/pr79439-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr79439-1.c
index 5732a236c8e..539c96f571e 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr79439-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr79439-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
-/* { dg-options "-O2 -fpic -fno-reorder-blocks" } */
+/* { dg-options "-O2 -fpic -fno-reorder-blocks -fno-inline-functions" } */
 
 /* On the Linux 64-bit ABIs, we eliminate NOP in the 'rec' call even if
-fpic is used.  The recursive call should call the local alias.  The
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c 
b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
index 5d31309f272..0780b01ffab 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
@@ -1,7 +1,7 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mdejagnu-cpu=power7" } */
+/* { dg-options "-O2 -mdejagnu-cpu=power7 -fno-inline-functions" } */
 
 /* Test simple extract/insert/slat operations.  Make sure all types are
supported with various options.  */
-- 
2.21.0.777.g83232e3864




Re: [PATCH] Add explicit description for -finline

2019-11-03 Thread luoxhu
On 2019/11/2 00:23, Joseph Myers wrote:
> On Thu, 31 Oct 2019, Xiong Hu Luo wrote:
> 
>> +@code{-finline} enables inlining of function declared \"inline\".
>> +@code{-finline} is enabled at levels -O1, -O2, -O3 and -Os, but not -Og.
> 
> Use @option{} to mark up option names (both -finline and all the -O
> options in this paragraph).  Use @code{} to mark up keyword names, not
> \"\".
> 

Thanks.  So shall I commit the tiny patch with below updates?

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1407d019d14..ea0d407fe11 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -8576,6 +8576,10 @@ optimizing.
 Single functions can be exempted from inlining by marking them
 with the @code{noinline} attribute.

+@option{-finline} enables inlining of function declared @code{inline}.
+@option{-finline} is enabled at levels @option{-O1}, @option{-O2}, @option{-O3}
+and @option{-Os}, but not @option{-Og}.
+
 @item -finline-small-functions
 @opindex finline-small-functions
 Integrate functions into their callers when their body is smaller than expected



[PATCH V3] rs6000: Refine small loop unroll in loop_unroll_adjust hook

2019-11-03 Thread Jiufu Guo
Hi,

In this patch, loop unroll adjust hook is introduced for powerpc.  We can do
target related hueristic adjustment in this hook. In this patch, small loops
is unrolled 2 times for O2 and O3 by default.  With this patch, we can see
some improvement for spec2017.  This patch enhanced a little for [Patch V2] to
enable small loops unroll for O3 by default like O2.

Bootstrapped and regtested on powerpc64le.  Is this ok for trunk?

Jiufu
BR.

gcc/
2019-11-04  Jiufu Guo   

PR tree-optimization/88760
* config/rs6000/rs6000.c (rs6000_option_override_internal): Remove
code which changes PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS.
(TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook.
(rs6000_loop_unroll_adjust): New hook for loop unroll adjust.
Unrolling small loop 2 times for -O2 and -O3.
(rs6000_function_specific_save): Save unroll_small_loops flag.
(rs6000_function_specific_restore): Restore unroll_small_loops flag.
* gcc/config/rs6000/rs6000.opt (unroll_small_loops): New internal flag.


gcc.testsuite/
2019-11-04  Jiufu Guo  

PR tree-optimization/88760
* gcc.dg/pr59643.c: Update back to r277550.

---
 gcc/config/rs6000/rs6000.c | 38 --
 gcc/config/rs6000/rs6000.opt   |  7 +++
 gcc/testsuite/gcc.dg/pr59643.c |  3 ---
 3 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 9ed5151..5e1a75d 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1428,6 +1428,9 @@ static const struct attribute_spec 
rs6000_attribute_table[] =
 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
 
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
+
 #undef TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS rs6000_init_builtins
 #undef TARGET_BUILTIN_DECL
@@ -4540,25 +4543,20 @@ rs6000_option_override_internal (bool global_init_p)
 global_options.x_param_values,
 global_options_set.x_param_values);
 
-  /* unroll very small loops 2 time if no -funroll-loops.  */
+  /* If funroll-loops is not enabled explicitly, then enable small loops
+unrolling for -O2, and do not turn fweb or frename-registers on.  */
   if (!global_options_set.x_flag_unroll_loops
  && !global_options_set.x_flag_unroll_all_loops)
{
- maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
-global_options.x_param_values,
-global_options_set.x_param_values);
-
- maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
-global_options.x_param_values,
-global_options_set.x_param_values);
+ unroll_small_loops = optimize >= 2 ? 1 : 0;
 
- /* If fweb or frename-registers are not specificed in command-line,
-do not turn them on implicitly.  */
  if (!global_options_set.x_flag_web)
global_options.x_flag_web = 0;
  if (!global_options_set.x_flag_rename_registers)
global_options.x_flag_rename_registers = 0;
}
+  else
+   unroll_small_loops = 0;
 
   /* If using typedef char *va_list, signal that
 __builtin_va_start (&ap, 0) can be optimized to
@@ -5101,6 +5099,24 @@ rs6000_destroy_cost_data (void *data)
   free (data);
 }
 
+/*  Implement targetm.loop_unroll_adjust.  */
+
+static unsigned
+rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)
+{
+  if (unroll_small_loops)
+{
+  /* TODO: This is hardcoded to 10 right now.  It can be refined, for
+example we may want to unroll very small loops more times (4 perhaps).
+We also should use a PARAM for this.  */
+  if (loop->ninsns <= 10)
+   return MIN (2, nunroll);
+  else
+   return 0;
+}
+  return nunroll;
+}
+
 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
library with vectorized intrinsics.  */
 
@@ -23472,6 +23488,7 @@ rs6000_function_specific_save (struct cl_target_option 
*ptr,
 {
   ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
   ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
+  ptr->x_unroll_small_loops = opts->x_unroll_small_loops;
 }
 
 /* Restore the current options */
@@ -23483,6 +23500,7 @@ rs6000_function_specific_restore (struct gcc_options 
*opts,
 {
   opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
   opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
+  opts->x_unroll_small_loops = ptr->x_unroll_small_loops;
   (void) rs6000_option_override_internal (false);
 }
 
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 1f37a92..9cd5b4e 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ 

[PATCH, rs6000] Make load cost more in vectorization cost for P8/P9

2019-11-03 Thread Kewen.Lin
Hi,

To align with rs6000_insn_cost costing more for load type insns,
this patch is to make load insns cost more in vectorization cost
function.  Considering that the result of load usually is used
somehow later (true-dep) but store won't, we keep the store as
before.

The SPEC2017 performance evaluation on Power8 shows 525.x264_r
+9.56%, 511.povray_r +2.08%, 527.cam4_r 1.16% gains, no 
significant degradation, SPECINT geomean +0.88%, SPECFP geomean
+0.26%.

The SPEC2017 performance evaluation on Power9 shows no significant
improvement or degradation, SPECINT geomean +0.04%, SPECFP geomean
+0.04%.

The SPEC2006 performance evaluation on Power8 shows 454.calculix
+4.41% gain but 416.gamess -1.19% and 453.povray -3.83% degradation.
I looked into the two degradation bmks, the degradation were NOT
due to hotspot changes by vectorization, were all side effects.
SPECINT geomean +0.10%, SPECFP geomean no changed considering
the degradation.

Bootstrapped and regress tested on powerpc64le-linux-gnu.  
Is OK for trunk?


BR,
Kewen

---

gcc/ChangeLog

2019-11-04  Kewen Lin  

* config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Make
scalar_load, vector_load, unaligned_load and vector_gather_load cost
a bit more on Power8 and up.

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 5876714..876c7ef 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4763,15 +4763,22 @@ rs6000_builtin_vectorization_cost (enum 
vect_cost_for_stmt type_of_cost,
   switch (type_of_cost)
 {
   case scalar_stmt:
-  case scalar_load:
   case scalar_store:
   case vector_stmt:
-  case vector_load:
   case vector_store:
   case vec_to_scalar:
   case scalar_to_vec:
   case cond_branch_not_taken:
 return 1;
+  case scalar_load:
+  case vector_load:
+   /* Like rs6000_insn_cost, make load insns cost a bit more. FIXME: the
+  benefits were observed on Power8 and up, we can unify it if similar
+  profits are measured on Power6 and Power7.  */
+   if (TARGET_P8_VECTOR)
+ return 2;
+   else
+ return 1;
 
   case vec_perm:
/* Power7 has only one permute unit, make it a bit expensive.  */
@@ -4792,8 +4799,9 @@ rs6000_builtin_vectorization_cost (enum 
vect_cost_for_stmt type_of_cost,
 
   case unaligned_load:
   case vector_gather_load:
+   /* Like rs6000_insn_cost, make load insns cost a bit more.  */
if (TARGET_EFFICIENT_UNALIGNED_VSX)
- return 1;
+ return 2;
 
 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
   {
@@ -4827,7 +4835,13 @@ rs6000_builtin_vectorization_cost (enum 
vect_cost_for_stmt type_of_cost,
   /* Misaligned loads are not supported.  */
   gcc_unreachable ();
 
-return 2;
+   /* Like rs6000_insn_cost, make load insns cost a bit more. FIXME: the
+  benefits were observed on Power8 and up, we can unify it if similar
+  profits are measured on Power6 and Power7.  */
+   if (TARGET_P8_VECTOR)
+ return 4;
+   else
+ return 2;
 
   case unaligned_store:
   case vector_scatter_store: