[PATCH] c++: Fix ICE with __builtin_bit_cast [PR98469]

2020-12-30 Thread Jakub Jelinek via Gcc-patches
Hi!

On the following testcase we ICE during constexpr evaluation (for warnings),
because the IL has ADDR_EXPR of BIT_CAST_EXPR and ADDR_EXPR case asserts
the result is not a CONSTRUCTOR.
I've tried to force a temporary for those in call.c next to:
if (convs->need_temporary_p
|| TREE_CODE (expr) == CONSTRUCTOR
|| TREE_CODE (expr) == VA_ARG_EXPR)
but that resulted in a lot of ICEs, so this patch just punts on lval
evaluation of BIT_CAST_EXPR instead, normally __builtin_bit_cast is called
from std::bit_cast which is constexpr and therefore the BIT_CAST_EXPR
isn't evaluated there during parsing or tsubst and when evaluating the call
to std::bit_cast the NRV optimized return is assigned to some temporary or
variable and so BIT_CAST_EXPR is not evaluated as lval.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-12-30  Jakub Jelinek  

PR c++/98469
* constexpr.c (cxx_eval_constant_expression) :
Punt if lval is true.

* g++.dg/cpp2a/bit-cast8.C: New test.
* g++.dg/cpp2a/bit-cast9.C: New test.

--- gcc/cp/constexpr.c.jj   2020-12-23 22:44:05.398093175 +0100
+++ gcc/cp/constexpr.c  2020-12-29 10:32:44.865030881 +0100
@@ -6900,6 +6900,15 @@ cxx_eval_constant_expression (const cons
   return t;
 
 case BIT_CAST_EXPR:
+  if (lval)
+   {
+ if (!ctx->quiet)
+   error_at (EXPR_LOCATION (t),
+ "address of a call to %qs is not a constant expression",
+ "__builtin_bit_cast");
+ *non_constant_p = true;
+ return t;
+   }
   r = cxx_eval_bit_cast (ctx, t, non_constant_p, overflow_p);
   break;
 
--- gcc/testsuite/g++.dg/cpp2a/bit-cast8.C.jj   2020-12-29 10:35:31.547140723 
+0100
+++ gcc/testsuite/g++.dg/cpp2a/bit-cast8.C  2020-12-29 10:34:26.431879120 
+0100
@@ -0,0 +1,11 @@
+// PR c++/98469
+// { dg-do compile { target c++20 } }
+// { dg-options "-Wall" }
+
+struct S { int s; };
+
+S
+foo ()
+{
+  return __builtin_bit_cast (S, 0);
+}
--- gcc/testsuite/g++.dg/cpp2a/bit-cast9.C.jj   2020-12-29 10:35:35.018101365 
+0100
+++ gcc/testsuite/g++.dg/cpp2a/bit-cast9.C  2020-12-29 10:35:05.905431494 
+0100
@@ -0,0 +1,15 @@
+// PR c++/98469
+// { dg-do compile { target c++20 } }
+// { dg-options "-Wall" }
+
+template
+constexpr T
+bit_cast (const F &f) noexcept
+{
+  return __builtin_bit_cast (T, f);
+}
+struct S { int s; };
+constexpr int foo (const S &x) { return x.s; }
+constexpr int bar () { return foo (bit_cast (0)); }
+constexpr int x = bar ();
+static_assert (!x);

Jakub



[PATCH] reassoc: Optimize x > 0x1fff || y > 0x1fff into (x | y) > 0x1fff [PR56719]

2020-12-30 Thread Jakub Jelinek via Gcc-patches
Hi!

The following patch adds an optimization mentioned in PR56719 #c8.
We already have the x != 0 && y != 0 && z != 0 into (x | y | z) != 0
and x != -1 && y != -1 && y != -1 into (x & y & z) != -1
optimizations, this patch just extends that to
x < C && y < C && z < C for power of two constants C into
(x | y | z) < C (for unsigned comparisons).

I didn't want to create too many buckets (there can be TYPE_PRECISION such
constants), so the patch instead just uses one buckets for all such
constants and loops over that bucket up to TYPE_PRECISION times.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-12-30  Jakub Jelinek  

PR tree-optimization/56719
* tree-ssa-reassoc.c (optimize_range_tests_cmp_bitwise): Also optimize
x < C && y < C && z < C when C is a power of two constant into
(x | y | z) < C.

* gcc.dg/tree-ssa/pr56719.c: New test.

--- gcc/tree-ssa-reassoc.c.jj   2020-12-01 13:19:12.859127403 +0100
+++ gcc/tree-ssa-reassoc.c  2020-12-29 12:42:23.432102952 +0100
@@ -3317,7 +3317,9 @@ optimize_range_tests_to_bit_test (enum t
 }
 
 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
-   and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.  */
+   and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
+   Also, handle x < C && y < C && z < C where C is power of two as
+   (x | y | z) < C.  */
 
 static bool
 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
@@ -,20 +3335,44 @@ optimize_range_tests_cmp_bitwise (enum t
 
   for (i = first; i < length; i++)
 {
+  int idx;
+
   if (ranges[i].exp == NULL_TREE
  || TREE_CODE (ranges[i].exp) != SSA_NAME
  || !ranges[i].in_p
  || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
- || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
- || ranges[i].low == NULL_TREE
- || ranges[i].low != ranges[i].high)
+ || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
continue;
 
-  bool zero_p = integer_zerop (ranges[i].low);
-  if (!zero_p && !integer_all_onesp (ranges[i].low))
+  if (ranges[i].low != NULL_TREE
+ && ranges[i].high != NULL_TREE
+ && tree_int_cst_equal (ranges[i].low, ranges[i].high))
+   {
+ idx = !integer_zerop (ranges[i].low);
+ if (idx && !integer_all_onesp (ranges[i].low))
+   continue;
+   }
+  else if (ranges[i].high != NULL_TREE
+  && TREE_CODE (ranges[i].high) == INTEGER_CST)
+   {
+ wide_int w = wi::to_wide (ranges[i].high);
+ int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
+ int l = wi::clz (w);
+ idx = 2;
+ if (l <= 0
+ || l >= prec
+ || w != wi::mask (prec - l, false, prec))
+   continue;
+ if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
+&& ranges[i].low == NULL_TREE)
+   || (ranges[i].low
+   && integer_zerop (ranges[i].low
+   continue;
+   }
+  else
continue;
 
-  b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
+  b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 3 + idx;
   if (buckets.length () <= b)
buckets.safe_grow_cleared (b + 1, true);
   if (chains.length () <= (unsigned) i)
@@ -3359,6 +3385,44 @@ optimize_range_tests_cmp_bitwise (enum t
 if (i && chains[i - 1])
   {
int j, k = i;
+   if ((b % 3) == 2)
+ {
+   /* When ranges[X - 1].high + 1 is a power of two,
+  we need to process the same bucket up to
+  precision - 1 times, each time split the entries
+  with the same high bound into one chain and the
+  rest into another one to be processed later.  */
+   int this_prev = i;
+   int other_prev = 0;
+   for (j = chains[i - 1]; j; j = chains[j - 1])
+ {
+   if (tree_int_cst_equal (ranges[i - 1].high,
+   ranges[j - 1].high))
+ {
+   chains[this_prev - 1] = j;
+   this_prev = j;
+ }
+   else if (other_prev == 0)
+ {
+   buckets[b] = j;
+   other_prev = j;
+ }
+   else
+ {
+   chains[other_prev - 1] = j;
+   other_prev = j;
+ }
+ }
+   chains[this_prev - 1] = 0;
+   if (other_prev)
+ chains[other_prev - 1] = 0;
+   if (chains[i - 1] == 0)
+ {
+   if (other_prev)
+ b--;
+   continue;
+ }
+ }
for (j = chains[i - 1]; j; j = chains[j - 1])
  {
gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
@@ -3426,8 +3490,8 @@ optimize_ra

[PATCH] i386: Optimize pmovmskb on inverted vector to inversion of pmovmskb result [PR98461]

2020-12-30 Thread Jakub Jelinek via Gcc-patches
Hi!

The following patch adds combine splitters to optimize:
-   vpcmpeqd%ymm1, %ymm1, %ymm1
-   vpandn  %ymm1, %ymm0, %ymm0
vpmovmskb   %ymm0, %eax
+   notl%eax
etc. (for vectors with less than 32 elements with xorl instead of notl).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-12-30  Jakub Jelinek  

PR target/98461
* config/i386/sse.md (_pmovmskb): Add splitters
for pmovmskb of NOT vector.

* gcc.target/i386/sse2-pr98461.c: New test.
* gcc.target/i386/avx2-pr98461.c: New test.

--- gcc/config/i386/sse.md.jj   2020-12-28 12:27:32.318754687 +0100
+++ gcc/config/i386/sse.md  2020-12-29 14:15:45.898508216 +0100
@@ -16099,6 +16099,53 @@ (define_insn "*sse2_pmovmskb_ext"
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
 
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+   (unspec:SI
+ [(not:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand"))]
+  UNSPEC_MOVMSK))]
+  "TARGET_SSE2"
+  [(set (match_dup 2)
+   (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
+   (set (match_dup 0) (match_dup 3))]
+{
+  operands[2] = gen_reg_rtx (SImode);
+  if (GET_MODE_NUNITS (mode) == 32)
+operands[3] = gen_rtx_NOT (SImode, operands[2]);
+  else
+{
+  operands[3]
+   = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (mode)) - 1,
+   SImode);
+  operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
+}
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+   (unspec:SI
+ [(subreg:VI1_AVX2 (not (match_operand 1 "register_operand")) 0)]
+  UNSPEC_MOVMSK))]
+  "TARGET_SSE2
+   && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_INT
+   && GET_MODE_SIZE (GET_MODE (operands[1])) == "
+  [(set (match_dup 2)
+   (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
+   (set (match_dup 0) (match_dup 3))]
+{
+  operands[2] = gen_reg_rtx (SImode);
+  operands[1] = gen_lowpart (mode, operands[1]);
+  if (GET_MODE_NUNITS (mode) == 32)
+operands[3] = gen_rtx_NOT (SImode, operands[2]);
+  else
+{
+  operands[3]
+   = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (mode)) - 1,
+   SImode);
+  operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
+}
+})
+
 (define_insn_and_split "*_pmovmskb_lt"
   [(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI
--- gcc/testsuite/gcc.target/i386/sse2-pr98461.c.jj 2020-12-29 
14:20:44.258146127 +0100
+++ gcc/testsuite/gcc.target/i386/sse2-pr98461.c2020-12-29 
14:23:11.462490600 +0100
@@ -0,0 +1,50 @@
+/* PR target/98461 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-sse3 -masm=att" } */
+/* { dg-final { scan-assembler-times "\tpmovmskb\t" 6 } } */
+/* { dg-final { scan-assembler-times "\txorl\t" 6 } } */
+/* { dg-final { scan-assembler-not "\tpcmpeq" } } */
+/* { dg-final { scan-assembler-not "\tpxor" } } */
+/* { dg-final { scan-assembler-not "\tpandn" } } */
+
+#include 
+
+int
+f1 (__m128i x)
+{
+  return _mm_movemask_epi8 (x) ^ 65535;
+}
+
+int
+f2 (__m128i x)
+{
+  return _mm_movemask_epi8 (_mm_andnot_si128 (x, _mm_set1_epi8 (255)));
+}
+
+int
+f3 (__v16qi x)
+{
+  x ^= (__v16qi) { -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1 };
+  return _mm_movemask_epi8 ((__m128i) x);
+}
+
+long
+f4 (__m128i x)
+{
+  return (unsigned) (_mm_movemask_epi8 (x) ^ 65535);
+}
+
+long
+f5 (__m128i x)
+{
+  return (unsigned) _mm_movemask_epi8 (_mm_andnot_si128 (x, _mm_set1_epi8 
(255)));
+}
+
+long
+f6 (__v16qi x)
+{
+  x ^= (__v16qi) { -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1 };
+  return (unsigned) _mm_movemask_epi8 ((__m128i) x);
+}
--- gcc/testsuite/gcc.target/i386/avx2-pr98461.c.jj 2020-12-29 
14:20:27.429335767 +0100
+++ gcc/testsuite/gcc.target/i386/avx2-pr98461.c2020-12-29 
14:19:50.944746895 +0100
@@ -0,0 +1,54 @@
+/* PR target/98461 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -masm=att" } */
+/* { dg-final { scan-assembler-times "\tvpmovmskb\t" 6 } } */
+/* { dg-final { scan-assembler-times "\tnotl\t" 6 } } */
+/* { dg-final { scan-assembler-not "\tvpcmpeq" } } */
+/* { dg-final { scan-assembler-not "\tvpxor" } } */
+/* { dg-final { scan-assembler-not "\tvpandn" } } */
+
+#include 
+
+int
+f1 (__m256i x)
+{
+  return ~_mm256_movemask_epi8 (x);
+}
+
+int
+f2 (__m256i x)
+{
+  return _mm256_movemask_epi8 (_mm256_andnot_si256 (x, _mm256_set1_epi8 
(255)));
+}
+
+int
+f3 (__v32qi x)
+{
+  x ^= (__v32qi) { -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1 };
+  return _mm256_movemask_epi8 ((__m256i) x);
+}
+
+long
+f4 (__m256i x)
+{
+  return (unsigned) ~_mm256_movemask_epi8 (x);
+}
+
+long
+f5 (__m256i x)
+{
+  return (unsigned) _mm256_movem

[PATCH] fold-const: Avoid (cast) ((cast2) x p+ y) folding for -fsanitize=alignment [PR98206]

2020-12-30 Thread Jakub Jelinek via Gcc-patches
Hi!

The following testcase is diagnosed by UBSan as invalid, even when it is
valid.
We have a derived type Base2 at offset 1 with alignment 1 and do:
(const Derived &) ((const Base2 *) this + -1)
but the folder before ubsan in the FE gets a chance to instrument it
optimizes that into:
(const Derived &) this + -1
and so we require that this has 8-byte alignment which Derived class needs.

Fixed by avoiding such an optimization when -fsanitize=alignment is in
effect if it would affect the alignments (and guarded with !in_gimple_form
because we don't really care during GIMPLE, though pointer conversions are
useless then and so such folding isn't needed very much during GIMPLE).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-12-30  Jakub Jelinek  

PR c++/98206
* fold-const.c: Include asan.h.
(fold_unary_loc): Don't optimize (ptr_type) (((ptr_type2) x) p+ y)
into ((ptr_type) x) p+ y if sanitizing alignment in GENERIC and
ptr_type points to type with higher alignment than ptr_type2.

* g++.dg/ubsan/align-4.C: New test.

--- gcc/fold-const.c.jj 2020-12-21 10:14:39.409173804 +0100
+++ gcc/fold-const.c2020-12-29 15:40:34.283148245 +0100
@@ -82,6 +82,7 @@ along with GCC; see the file COPYING3.
 #include "attribs.h"
 #include "tree-vector-builder.h"
 #include "vec-perm-indices.h"
+#include "asan.h"
 
 /* Nonzero if we are folding constants inside an initializer; zero
otherwise.  */
@@ -9392,8 +9393,17 @@ fold_unary_loc (location_t loc, enum tre
  tree arg00 = TREE_OPERAND (arg0, 0);
  tree arg01 = TREE_OPERAND (arg0, 1);
 
- return fold_build_pointer_plus_loc
-  (loc, fold_convert_loc (loc, type, arg00), arg01);
+ /* If -fsanitize=alignment, avoid this optimization in GENERIC
+when the pointed type needs higher alignment than
+the p+ first operand's pointed type.  */
+ if (!in_gimple_form
+ && sanitize_flags_p (SANITIZE_ALIGNMENT)
+ && (min_align_of_type (TREE_TYPE (type))
+ > min_align_of_type (TREE_TYPE (TREE_TYPE (arg00)
+   return NULL_TREE;
+
+ arg00 = fold_convert_loc (loc, type, arg00);
+ return fold_build_pointer_plus_loc (loc, arg00, arg01);
}
 
   /* Convert (T1)(~(T2)X) into ~(T1)X if T1 and T2 are integral types
--- gcc/testsuite/g++.dg/ubsan/align-4.C.jj 2020-12-29 15:28:11.811501092 
+0100
+++ gcc/testsuite/g++.dg/ubsan/align-4.C2020-12-29 15:29:54.198348346 
+0100
@@ -0,0 +1,31 @@
+// PR c++/98206
+// { dg-do run }
+// { dg-options "-fsanitize=alignment -std=c++11 
-fno-sanitize-recover=alignment" }
+
+template 
+struct Base1
+{
+  char c1;
+};
+
+template 
+struct Base2
+{
+  char c2;
+  const Derived &get2 () const { return static_cast (*this); }
+};
+
+struct X : public Base1, public Base2
+{
+  X (const char *d) : data{d} {}
+  const char *data;
+};
+
+int
+main ()
+{
+  X x = X{"cheesecake"};
+  const char *p = x.get2 ().data;
+  if (p[0] != 'c')
+__builtin_abort ();
+}

Jakub



Re: [PATCH] i386: Optimize pmovmskb on inverted vector to inversion of pmovmskb result [PR98461]

2020-12-30 Thread Uros Bizjak via Gcc-patches
On Wed, Dec 30, 2020 at 10:23 AM Jakub Jelinek  wrote:
>
> Hi!
>
> The following patch adds combine splitters to optimize:
> -   vpcmpeqd%ymm1, %ymm1, %ymm1
> -   vpandn  %ymm1, %ymm0, %ymm0
> vpmovmskb   %ymm0, %eax
> +   notl%eax
> etc. (for vectors with less than 32 elements with xorl instead of notl).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2020-12-30  Jakub Jelinek  
>
> PR target/98461
> * config/i386/sse.md (_pmovmskb): Add splitters
> for pmovmskb of NOT vector.
>
> * gcc.target/i386/sse2-pr98461.c: New test.
> * gcc.target/i386/avx2-pr98461.c: New test.

OK.

Thanks,
Uros.

>
> --- gcc/config/i386/sse.md.jj   2020-12-28 12:27:32.318754687 +0100
> +++ gcc/config/i386/sse.md  2020-12-29 14:15:45.898508216 +0100
> @@ -16099,6 +16099,53 @@ (define_insn "*sse2_pmovmskb_ext"
> (set_attr "prefix" "maybe_vex")
> (set_attr "mode" "SI")])
>
> +(define_split
> +  [(set (match_operand:SI 0 "register_operand")
> +   (unspec:SI
> + [(not:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand"))]
> +  UNSPEC_MOVMSK))]
> +  "TARGET_SSE2"
> +  [(set (match_dup 2)
> +   (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
> +   (set (match_dup 0) (match_dup 3))]
> +{
> +  operands[2] = gen_reg_rtx (SImode);
> +  if (GET_MODE_NUNITS (mode) == 32)
> +operands[3] = gen_rtx_NOT (SImode, operands[2]);
> +  else
> +{
> +  operands[3]
> +   = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (mode)) - 1,
> +   SImode);
> +  operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
> +}
> +})
> +
> +(define_split
> +  [(set (match_operand:SI 0 "register_operand")
> +   (unspec:SI
> + [(subreg:VI1_AVX2 (not (match_operand 1 "register_operand")) 0)]
> +  UNSPEC_MOVMSK))]
> +  "TARGET_SSE2
> +   && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_INT
> +   && GET_MODE_SIZE (GET_MODE (operands[1])) == "
> +  [(set (match_dup 2)
> +   (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
> +   (set (match_dup 0) (match_dup 3))]
> +{
> +  operands[2] = gen_reg_rtx (SImode);
> +  operands[1] = gen_lowpart (mode, operands[1]);
> +  if (GET_MODE_NUNITS (mode) == 32)
> +operands[3] = gen_rtx_NOT (SImode, operands[2]);
> +  else
> +{
> +  operands[3]
> +   = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (mode)) - 1,
> +   SImode);
> +  operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
> +}
> +})
> +
>  (define_insn_and_split "*_pmovmskb_lt"
>[(set (match_operand:SI 0 "register_operand" "=r")
> (unspec:SI
> --- gcc/testsuite/gcc.target/i386/sse2-pr98461.c.jj 2020-12-29 
> 14:20:44.258146127 +0100
> +++ gcc/testsuite/gcc.target/i386/sse2-pr98461.c2020-12-29 
> 14:23:11.462490600 +0100
> @@ -0,0 +1,50 @@
> +/* PR target/98461 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2 -mno-sse3 -masm=att" } */
> +/* { dg-final { scan-assembler-times "\tpmovmskb\t" 6 } } */
> +/* { dg-final { scan-assembler-times "\txorl\t" 6 } } */
> +/* { dg-final { scan-assembler-not "\tpcmpeq" } } */
> +/* { dg-final { scan-assembler-not "\tpxor" } } */
> +/* { dg-final { scan-assembler-not "\tpandn" } } */
> +
> +#include 
> +
> +int
> +f1 (__m128i x)
> +{
> +  return _mm_movemask_epi8 (x) ^ 65535;
> +}
> +
> +int
> +f2 (__m128i x)
> +{
> +  return _mm_movemask_epi8 (_mm_andnot_si128 (x, _mm_set1_epi8 (255)));
> +}
> +
> +int
> +f3 (__v16qi x)
> +{
> +  x ^= (__v16qi) { -1, -1, -1, -1, -1, -1, -1, -1,
> +  -1, -1, -1, -1, -1, -1, -1, -1 };
> +  return _mm_movemask_epi8 ((__m128i) x);
> +}
> +
> +long
> +f4 (__m128i x)
> +{
> +  return (unsigned) (_mm_movemask_epi8 (x) ^ 65535);
> +}
> +
> +long
> +f5 (__m128i x)
> +{
> +  return (unsigned) _mm_movemask_epi8 (_mm_andnot_si128 (x, _mm_set1_epi8 
> (255)));
> +}
> +
> +long
> +f6 (__v16qi x)
> +{
> +  x ^= (__v16qi) { -1, -1, -1, -1, -1, -1, -1, -1,
> +  -1, -1, -1, -1, -1, -1, -1, -1 };
> +  return (unsigned) _mm_movemask_epi8 ((__m128i) x);
> +}
> --- gcc/testsuite/gcc.target/i386/avx2-pr98461.c.jj 2020-12-29 
> 14:20:27.429335767 +0100
> +++ gcc/testsuite/gcc.target/i386/avx2-pr98461.c2020-12-29 
> 14:19:50.944746895 +0100
> @@ -0,0 +1,54 @@
> +/* PR target/98461 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx2 -masm=att" } */
> +/* { dg-final { scan-assembler-times "\tvpmovmskb\t" 6 } } */
> +/* { dg-final { scan-assembler-times "\tnotl\t" 6 } } */
> +/* { dg-final { scan-assembler-not "\tvpcmpeq" } } */
> +/* { dg-final { scan-assembler-not "\tvpxor" } } */
> +/* { dg-final { scan-assembler-not "\tvpandn" } } */
> +
> +#include 
> +
> +int
> +f1 (__m256i x)
> +{
> +  return ~_mm256_movemask_epi8 (x);
> +}
> +
> +int
> +f2 (__m256i x)
> +{
> +  return _mm256_movemask_epi8 (_mm256_andnot_si256 (x, _mm256_set1_epi8 
> (255)));
> +}
> +
> +int
> +f3 (__v32qi x)
>

Re: [PATCH 1/3] arm: Add movmisalign patterns for MVE (PR target/97875)

2020-12-30 Thread Christophe Lyon via Gcc-patches
ping?

On Thu, 17 Dec 2020 at 18:48, Christophe Lyon
 wrote:
>
> This patch adds new movmisalign_mve_load and store patterns for
> MVE to help vectorization. They are very similar to their Neon
> counterparts, but use different iterators and instructions.
>
> Indeed MVE supports less vectors modes than Neon, so we use
> the MVE_VLD_ST iterator where Neon uses VQX.
>
> Since the supported modes are different from the ones valid for
> arithmetic operators, we introduce two new sets of macros:
>
> ARM_HAVE_NEON__LDST
>   true if Neon has vector load/store instructions for 
>
> ARM_HAVE__LDST
>   true if any vector extension has vector load/store instructions for 
>
> We move the movmisalign expander from neon.md to vec-commond.md, and
> replace the TARGET_NEON enabler with ARM_HAVE__LDST.
>
> The patch also updates the mve-vneg.c test to scan for the better code
> generation when loading and storing the vectors involved: it checks
> that no 'orr' instruction is generated to cope with misalignment at
> runtime.
> This test was chosen among the other mve tests, but any other should
> be OK. Using a plain vector copy loop (dest[i] = a[i]) is not a good
> test because the compiler chooses to use memcpy.
>
> For instance we now generate:
> test_vneg_s32x4:
> vldrw.32   q3, [r1]
> vneg.s32  q3, q3
> vstrw.32   q3, [r0]
> bx  lr
>
> instead of:
> test_vneg_s32x4:
> orr r3, r1, r0
> lslsr3, r3, #28
> bne .L15
> vldrw.32q3, [r1]
> vneg.s32  q3, q3
> vstrw.32q3, [r0]
> bx  lr
> .L15:
> push{r4, r5}
> ldrdr2, r3, [r1, #8]
> ldrdr5, r4, [r1]
> rsbsr2, r2, #0
> rsbsr5, r5, #0
> rsbsr4, r4, #0
> rsbsr3, r3, #0
> strdr5, r4, [r0]
> pop {r4, r5}
> strdr2, r3, [r0, #8]
> bx  lr
>
> 2020-12-15  Christophe Lyon  
>
> PR target/97875
> gcc/
> * config/arm/arm.h (ARM_HAVE_NEON_V8QI_LDST): New macro.
> (ARM_HAVE_NEON_V16QI_LDST, ARM_HAVE_NEON_V4HI_LDST): Likewise.
> (ARM_HAVE_NEON_V8HI_LDST, ARM_HAVE_NEON_V2SI_LDST): Likewise.
> (ARM_HAVE_NEON_V4SI_LDST, ARM_HAVE_NEON_V4HF_LDST): Likewise.
> (ARM_HAVE_NEON_V8HF_LDST, ARM_HAVE_NEON_V4BF_LDST): Likewise.
> (ARM_HAVE_NEON_V8BF_LDST, ARM_HAVE_NEON_V2SF_LDST): Likewise.
> (ARM_HAVE_NEON_V4SF_LDST, ARM_HAVE_NEON_DI_LDST): Likewise.
> (ARM_HAVE_NEON_V2DI_LDST): Likewise.
> (ARM_HAVE_V8QI_LDST, ARM_HAVE_V16QI_LDST): Likewise.
> (ARM_HAVE_V4HI_LDST, ARM_HAVE_V8HI_LDST): Likewise.
> (ARM_HAVE_V2SI_LDST, ARM_HAVE_V4SI_LDST, ARM_HAVE_V4HF_LDST): 
> Likewise.
> (ARM_HAVE_V8HF_LDST, ARM_HAVE_V4BF_LDST, ARM_HAVE_V8BF_LDST): 
> Likewise.
> (ARM_HAVE_V2SF_LDST, ARM_HAVE_V4SF_LDST, ARM_HAVE_DI_LDST): Likewise.
> (ARM_HAVE_V2DI_LDST): Likewise.
> * config/arm/mve.md (*movmisalign_mve_store): New pattern.
> (*movmisalign_mve_load): New pattern.
> * config/arm/neon.md (movmisalign): Move to ...
> * config/arm/vec-common.md: ... here.
>
> PR target/97875
> gcc/testsuite/
> * gcc.target/arm/simd/mve-vneg.c: Update test.
> ---
>  gcc/config/arm/arm.h | 40 
> 
>  gcc/config/arm/mve.md| 25 +
>  gcc/config/arm/neon.md   | 25 -
>  gcc/config/arm/vec-common.md | 24 +
>  gcc/testsuite/gcc.target/arm/simd/mve-vneg.c |  3 +++
>  5 files changed, 92 insertions(+), 25 deletions(-)
>
> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
> index 4a63d33..d44e0c6 100644
> --- a/gcc/config/arm/arm.h
> +++ b/gcc/config/arm/arm.h
> @@ -1151,6 +1151,46 @@ extern const int arm_arch_cde_coproc_bits[];
>  #define ARM_HAVE_V8HF_ARITH (ARM_HAVE_NEON_V8HF_ARITH || 
> TARGET_HAVE_MVE_FLOAT)
>  #define ARM_HAVE_V4SF_ARITH (ARM_HAVE_NEON_V4SF_ARITH || 
> TARGET_HAVE_MVE_FLOAT)
>
> +/* The conditions under which vector modes are supported by load/store
> +   instructions using Neon.  */
> +
> +#define ARM_HAVE_NEON_V8QI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V16QI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V4HI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V8HI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V2SI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V4SI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V4HF_LDST TARGET_NEON_FP16INST
> +#define ARM_HAVE_NEON_V8HF_LDST TARGET_NEON_FP16INST
> +#define ARM_HAVE_NEON_V4BF_LDST TARGET_BF16_SIMD
> +#define ARM_HAVE_NEON_V8BF_LDST TARGET_BF16_SIMD
> +#define ARM_HAVE_NEON_V2SF_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V4SF_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_DI_LDST TARGET_NEON
> +#define ARM_HAVE_NEON_V2DI_LDST TARGET_NEON
> +
> +/* The conditions under which vector modes are suppor

Re: [PATCH 2/3] arm: Auto-vectorization for MVE: vshl

2020-12-30 Thread Christophe Lyon via Gcc-patches
ping?

On Thu, 17 Dec 2020 at 18:48, Christophe Lyon
 wrote:
>
> This patch enables MVE vshlq instructions for auto-vectorization.
>
> The existing mve_vshlq_n_ is kept, as it takes a single
> immediate as second operand, and is used by arm_mve.h.
>
> We move the vashl3 insn from neon.md to an expander in
> vec-common.md, and the mve_vshlq_ insn from mve.md to
> vec-common.md, adding the second alternative fron neon.md.
>
> mve_vshlq_ will be used by a later patch enabling
> vectorization for vshr, as a unified version of
> ashl3_[signed|unsigned] from neon.md. Keeping the use of unspec
> VSHLQ enables to generate both 's' and 'u' variants.
>
> It is not clear whether the neon_shift_[reg|imm] attribute is still
> suitable, since this insn is also used for MVE.
>
> I kept the mve_vshlq_ naming instead of renaming it to
> ashl3__ as discussed because the reference in
> arm_mve_builtins.def automatically inserts the "mve_" prefix and I
> didn't want to make a special case for this.
>
> I haven't yet found why the v16qi and v8hi tests are not vectorized.
> With dest[i] = a[i] << b[i] and:
>   {
> int i;
> unsigned int i.24_1;
> unsigned int _2;
> int16_t * _3;
> short int _4;
> int _5;
> int16_t * _6;
> short int _7;
> int _8;
> int _9;
> int16_t * _10;
> short int _11;
> unsigned int ivtmp_42;
> unsigned int ivtmp_43;
>
>  [local count: 119292720]:
>
>  [local count: 954449105]:
> i.24_1 = (unsigned int) i_23;
> _2 = i.24_1 * 2;
> _3 = a_15(D) + _2;
> _4 = *_3;
> _5 = (int) _4;
> _6 = b_16(D) + _2;
> _7 = *_6;
> _8 = (int) _7;
> _9 = _5 << _8;
> _10 = dest_17(D) + _2;
> _11 = (short int) _9;
> *_10 = _11;
> i_19 = i_23 + 1;
> ivtmp_42 = ivtmp_43 - 1;
> if (ivtmp_42 != 0)
>   goto ; [87.50%]
> else
>   goto ; [12.50%]
>
>  [local count: 835156386]:
> goto ; [100.00%]
>
>  [local count: 119292720]:
> return;
>
>   }
> the vectorizer says:
> mve-vshl.c:37:96: note:   ==> examining statement: _5 = (int) _4;
> mve-vshl.c:37:96: note:   vect_is_simple_use: operand *_3, type of def: 
> internal
> mve-vshl.c:37:96: note:   vect_is_simple_use: vectype vector(8) short int
> mve-vshl.c:37:96: missed:   conversion not supported by target.
> mve-vshl.c:37:96: note:   vect_is_simple_use: operand *_3, type of def: 
> internal
> mve-vshl.c:37:96: note:   vect_is_simple_use: vectype vector(8) short int
> mve-vshl.c:37:96: note:   vect_is_simple_use: operand *_3, type of def: 
> internal
> mve-vshl.c:37:96: note:   vect_is_simple_use: vectype vector(8) short int
> mve-vshl.c:37:117: missed:   not vectorized: relevant stmt not supported: _5 
> = (int) _4;
> mve-vshl.c:37:96: missed:  bad operation or unsupported loop bound.
> mve-vshl.c:37:96: note:  * Analysis failed with vector mode V8HI
>
> 2020-12-03  Christophe Lyon  
>
> gcc/
> * config/arm/mve.md (mve_vshlq_): Move to
> vec-commond.md.
> * config/arm/neon.md (vashl3): Delete.
> * config/arm/vec-common.md (mve_vshlq_): New.
> (vasl3): New expander.
>
> gcc/testsuite/
> * gcc.target/arm/simd/mve-vshl.c: Add tests for vshl.
> ---
>  gcc/config/arm/mve.md| 13 +-
>  gcc/config/arm/neon.md   | 19 -
>  gcc/config/arm/vec-common.md | 30 ++
>  gcc/testsuite/gcc.target/arm/simd/mve-vshl.c | 62 
> 
>  4 files changed, 93 insertions(+), 31 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
>
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 673a83c..8bdb451 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -822,18 +822,7 @@ (define_insn "mve_vcmpneq_"
>
>  ;;
>  ;; [vshlq_s, vshlq_u])
> -;;
> -(define_insn "mve_vshlq_"
> -  [
> -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -   (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
> -  (match_operand:MVE_2 2 "s_register_operand" "w")]
> -VSHLQ))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vshl.%#\t%q0, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -])
> +;; See vec-common.md
>
>  ;;
>  ;; [vabdq_s, vabdq_u])
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 50220be..ac9bf74 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -845,25 +845,6 @@ (define_insn "*smax3_neon"
>  ; generic vectorizer code.  It ends up creating a V2DI constructor with
>  ; SImode elements.
>
> -(define_insn "vashl3"
> -  [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
> -   (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
> - (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" 
> "w,Dm")))]
> -  "TARGET_NEON"
> -  {
> -switch (which_alternative)
> -  {
> -case 0: return "vshl.\t%0, %1, %2";
> -case 1: ret

Re: [PATCH 3/3] arm: Auto-vectorization for MVE: vshr

2020-12-30 Thread Christophe Lyon via Gcc-patches
ping?

On Thu, 17 Dec 2020 at 18:48, Christophe Lyon
 wrote:
>
> This patch enables MVE vshr instructions for auto-vectorization.  New
> MVE patterns are introduced that take a vector of constants as second
> operand, all constants being equal.
>
> The existing mve_vshrq_n_ is kept, as it takes a single
> immediate as second operand, and is used by arm_mve.h.
>
> The vashr3 and vlshr3 expanders are moved fron neon.md to
> vec-common.md, updated to rely on the normal expansion scheme to
> generate shifts by immediate.
>
> 2020-12-03  Christophe Lyon  
>
> gcc/
> * config/arm/mve.md (mve_vshrq_n_s_imm): New entry.
> (mve_vshrq_n_u_imm): Likewise.
> * config/arm/neon.md (vashr3, vlshr3): Move to ...
> * config/arm/vec-common.md: ... here.
>
> gcc/testsuite/
> * gcc.target/arm/simd/mve-vshr.c: Add tests for vshr.
> ---
>  gcc/config/arm/mve.md| 34 
>  gcc/config/arm/neon.md   | 34 
>  gcc/config/arm/vec-common.md | 38 +-
>  gcc/testsuite/gcc.target/arm/simd/mve-vshr.c | 59 
> 
>  4 files changed, 130 insertions(+), 35 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vshr.c
>
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 8bdb451..eea8b20 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -763,6 +763,7 @@ (define_insn "mve_vcreateq_"
>  ;;
>  ;; [vshrq_n_s, vshrq_n_u])
>  ;;
> +;; Version that takes an immediate as operand 2.
>  (define_insn "mve_vshrq_n_"
>[
> (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> @@ -775,6 +776,39 @@ (define_insn "mve_vshrq_n_"
>[(set_attr "type" "mve_move")
>  ])
>
> +;; Versions that take constant vectors as operand 2 (with all elements
> +;; equal).
> +(define_insn "mve_vshrq_n_s_imm"
> +  [
> +   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> +   (ashiftrt:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
> +   (match_operand:MVE_2 2 "imm_for_neon_rshift_operand" 
> "i")))
> +  ]
> +  "TARGET_HAVE_MVE"
> +  {
> +return neon_output_shift_immediate ("vshr", 's', &operands[2],
> +   mode,
> +   VALID_NEON_QREG_MODE (mode),
> +   true);
> +  }
> +  [(set_attr "type" "mve_move")
> +])
> +(define_insn "mve_vshrq_n_u_imm"
> +  [
> +   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> +   (lshiftrt:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
> +   (match_operand:MVE_2 2 "imm_for_neon_rshift_operand" 
> "i")))
> +  ]
> +  "TARGET_HAVE_MVE"
> +  {
> +return neon_output_shift_immediate ("vshr", 'u', &operands[2],
> +   mode,
> +   VALID_NEON_QREG_MODE (mode),
> +   true);
> +  }
> +  [(set_attr "type" "mve_move")
> +])
> +
>  ;;
>  ;; [vcvtq_n_from_f_s, vcvtq_n_from_f_u])
>  ;;
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index ac9bf74..a0e8d7a 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -899,40 +899,6 @@ (define_insn "ashl3_unsigned"
>[(set_attr "type" "neon_shift_reg")]
>  )
>
> -(define_expand "vashr3"
> -  [(set (match_operand:VDQIW 0 "s_register_operand")
> -   (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
> -   (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
> -  "TARGET_NEON"
> -{
> -  if (s_register_operand (operands[2], mode))
> -{
> -  rtx neg = gen_reg_rtx (mode);
> -  emit_insn (gen_neon_neg2 (neg, operands[2]));
> -  emit_insn (gen_ashl3_signed (operands[0], operands[1], neg));
> -}
> -  else
> -emit_insn (gen_vashr3_imm (operands[0], operands[1], operands[2]));
> -  DONE;
> -})
> -
> -(define_expand "vlshr3"
> -  [(set (match_operand:VDQIW 0 "s_register_operand")
> -   (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
> -   (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
> -  "TARGET_NEON"
> -{
> -  if (s_register_operand (operands[2], mode))
> -{
> -  rtx neg = gen_reg_rtx (mode);
> -  emit_insn (gen_neon_neg2 (neg, operands[2]));
> -  emit_insn (gen_ashl3_unsigned (operands[0], operands[1], neg));
> -}
> -  else
> -emit_insn (gen_vlshr3_imm (operands[0], operands[1], operands[2]));
> -  DONE;
> -})
> -
>  ;; 64-bit shifts
>
>  ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
> diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
> index 3a282f0..e126557 100644
> --- a/gcc/config/arm/vec-common.md
> +++ b/gcc/config/arm/vec-common.md
> @@ -258,4 +258,40 @@ (define_expand "vashl3"
>  {
>emit_insn (gen_mve_vshlq_u (operands[0], operands[1], operands[2]));
>DONE;
> -})
> \ 

[committed] d: Give the result of evaluated expressions a location

2020-12-30 Thread Iain Buclaw via Gcc-patches
Hi,

CST trees that were converted back to a D front-end AST node lost all
location information of the original expression.  With this patch, now
this is propagated on to the literal expression.

Bootstrapped and regression tested on x86_64-linux-gnu, and committed to
mainline.

Regards,
Iain.

---
gcc/d/ChangeLog:

* d-tree.h (d_eval_constant_expression): Add location argument.
* d-builtins.cc (d_eval_constant_expression): Give generated constants
a proper file location.
* d-compiler.cc (Compiler::paintAsType): Pass expression location to
d_eval_constant_expression.
* d-frontend.cc (eval_builtin): Likewise.
---
 gcc/d/d-builtins.cc | 17 +
 gcc/d/d-compiler.cc |  4 ++--
 gcc/d/d-frontend.cc |  2 +-
 gcc/d/d-tree.h  |  2 +-
 4 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/gcc/d/d-builtins.cc b/gcc/d/d-builtins.cc
index 72e2d3a7168..9c629c7028b 100644
--- a/gcc/d/d-builtins.cc
+++ b/gcc/d/d-builtins.cc
@@ -332,11 +332,12 @@ build_frontend_type (tree type)
 }
 
 /* Attempt to convert GCC evaluated CST to a D Frontend Expression.
+   LOC is the location in the source file where this CST is being evaluated.
This is used for getting the CTFE value out of a const-folded builtin,
returns NULL if it cannot convert CST.  */
 
 Expression *
-d_eval_constant_expression (tree cst)
+d_eval_constant_expression (const Loc &loc, tree cst)
 {
   STRIP_TYPE_NOPS (cst);
   Type *type = build_frontend_type (TREE_TYPE (cst));
@@ -353,23 +354,23 @@ d_eval_constant_expression (tree cst)
  real_value re = TREE_REAL_CST (TREE_REALPART (cst));
  real_value im = TREE_REAL_CST (TREE_IMAGPART (cst));
  complex_t value = complex_t (ldouble (re), ldouble (im));
- return ComplexExp::create (Loc (), value, type);
+ return ComplexExp::create (loc, value, type);
}
   else if (code == INTEGER_CST)
{
  dinteger_t value = TREE_INT_CST_LOW (cst);
- return IntegerExp::create (Loc (), value, type);
+ return IntegerExp::create (loc, value, type);
}
   else if (code == REAL_CST)
{
  real_value value = TREE_REAL_CST (cst);
- return RealExp::create (Loc (), ldouble (value), type);
+ return RealExp::create (loc, ldouble (value), type);
}
   else if (code == STRING_CST)
{
  const void *string = TREE_STRING_POINTER (cst);
  size_t len = TREE_STRING_LENGTH (cst);
- return StringExp::create (Loc (), CONST_CAST (void *, string), len);
+ return StringExp::create (loc, CONST_CAST (void *, string), len);
}
   else if (code == VECTOR_CST)
{
@@ -380,17 +381,17 @@ d_eval_constant_expression (tree cst)
  for (size_t i = 0; i < nunits; i++)
{
  Expression *elem
-   = d_eval_constant_expression (VECTOR_CST_ELT (cst, i));
+   = d_eval_constant_expression (loc, VECTOR_CST_ELT (cst, i));
  if (elem == NULL)
return NULL;
 
  (*elements)[i] = elem;
}
 
- Expression *e = ArrayLiteralExp::create (Loc (), elements);
+ Expression *e = ArrayLiteralExp::create (loc, elements);
  e->type = type->isTypeVector ()->basetype;
 
- return VectorExp::create (Loc (), e, type);
+ return VectorExp::create (loc, e, type);
}
 }
 
diff --git a/gcc/d/d-compiler.cc b/gcc/d/d-compiler.cc
index ffa7f78c82e..f737d8d9686 100644
--- a/gcc/d/d-compiler.cc
+++ b/gcc/d/d-compiler.cc
@@ -133,7 +133,7 @@ Compiler::paintAsType (UnionExp *, Expression *expr, Type 
*type)
 
   cst = native_interpret_expr (vectype, buffer, len);
 
-  Expression *e = d_eval_constant_expression (cst);
+  Expression *e = d_eval_constant_expression (expr->loc, cst);
   gcc_assert (e != NULL && e->op == TOKvector);
 
   return e->isVectorExp ()->e1;
@@ -143,7 +143,7 @@ Compiler::paintAsType (UnionExp *, Expression *expr, Type 
*type)
   /* Normal interpret cast.  */
   cst = native_interpret_expr (build_ctype (type), buffer, len);
 
-  Expression *e = d_eval_constant_expression (cst);
+  Expression *e = d_eval_constant_expression (expr->loc, cst);
   gcc_assert (e != NULL);
 
   return e;
diff --git a/gcc/d/d-frontend.cc b/gcc/d/d-frontend.cc
index da34e902275..91335307150 100644
--- a/gcc/d/d-frontend.cc
+++ b/gcc/d/d-frontend.cc
@@ -195,7 +195,7 @@ eval_builtin (Loc loc, FuncDeclaration *fd, Expressions 
*arguments)
   /* Builtin should be successfully evaluated.
  Will only return NULL if we can't convert it.  */
   if (TREE_CONSTANT (result) && TREE_CODE (result) != CALL_EXPR)
-e = d_eval_constant_expression (result);
+e = d_eval_constant_expression (loc, result);
 
   return e;
 }
diff --git a/gcc/d/d-tree.h b/gcc/d/d-tree.h
index 31fe5181912..f5cf9d3f214 100644
--- a/gcc/d/d-tree.h
+++ b/gcc/d/d-tree.h
@@ -496,7 +496,7 @@ 

[committed] d: Simplify quoting characters in deps_add_target

2020-12-30 Thread Iain Buclaw via Gcc-patches
Hi,

The implementation of deps_add_target in d-lang.cc was based on what was
present in libcpp.  This patch synchronizes the escaping logic to match
the current version.

Bootstrapped and regression tested on x86_64-linux-gnu, committed to
mainline.

Regards
Iain

---
gcc/d/ChangeLog:

* d-lang.cc (deps_add_target): Handle quoting ':' character.
Reimplement backslash tracking.
---
 gcc/d/d-lang.cc | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/gcc/d/d-lang.cc b/gcc/d/d-lang.cc
index fb95716f918..f20d1f00cb3 100644
--- a/gcc/d/d-lang.cc
+++ b/gcc/d/d-lang.cc
@@ -114,26 +114,35 @@ deps_add_target (const char *target, bool quoted)
 }
 
   /* Quote characters in target which are significant to Make.  */
+  unsigned slashes = 0;
+
   for (const char *p = target; *p != '\0'; p++)
 {
   switch (*p)
{
+   case '\\':
+ slashes++;
+ break;
+
case ' ':
case '\t':
- for (const char *q = p - 1; target <= q && *q == '\\';  q--)
+ while (slashes--)
obstack_1grow (&buffer, '\\');
  obstack_1grow (&buffer, '\\');
- break;
+ goto Ldef;
 
case '$':
  obstack_1grow (&buffer, '$');
- break;
+ goto Ldef;
 
case '#':
+   case ':':
  obstack_1grow (&buffer, '\\');
- break;
+ goto Ldef;
 
default:
+   Ldef:
+ slashes = 0;
  break;
}
 
-- 
2.27.0



[PATCH] i386: Remove unnecessary clobbers from combine splitters.

2020-12-30 Thread Uros Bizjak via Gcc-patches
There is no need for combine splitters to emit insn patterns with clobbers,
the pass is smart enough to add clobbers to patterns as necessary.

2020-12-30  Uroš Bizjak  

gcc/
* config/i386/i386.md: Remove unnecessary clobbers
from combine splitters.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to the mainline.

Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d7cd3df995c..ea1a0706dcb 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -12693,12 +12693,10 @@
  [(not:SWI (match_operand:SWI 2 "register_operand"))
   (match_operand:SWI 3 "nonimmediate_operand")]))]
   ""
-  [(parallel
- [(set (reg:CCC FLAGS_REG)
-  (compare:CCC
-(plus:SWI (match_dup 2) (match_dup 3))
-(match_dup 2)))
-  (clobber (scratch:SWI))])
+  [(set (reg:CCC FLAGS_REG)
+   (compare:CCC
+ (plus:SWI (match_dup 2) (match_dup 3))
+ (match_dup 2)))
(set (match_dup 0)
(match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)]))])
 
@@ -12709,12 +12707,10 @@
   (match_operand 3 "const_int_operand")]))]
   "TARGET_64BIT
&& IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)"
-  [(parallel
- [(set (reg:CCZ FLAGS_REG)
-  (compare:CCZ
-(lshiftrt:DI (match_dup 2) (match_dup 4))
-(const_int 0)))
-  (clobber (scratch:DI))])
+  [(set (reg:CCZ FLAGS_REG)
+   (compare:CCZ
+ (lshiftrt:DI (match_dup 2) (match_dup 4))
+ (const_int 0)))
(set (match_dup 0)
(match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)]))]
 {
@@ -12905,12 +12901,10 @@
  (label_ref (match_operand 0))
  (pc)))]
   ""
-  [(parallel
- [(set (reg:CCC FLAGS_REG)
-  (compare:CCC
-(plus:SWI (match_dup 2) (match_dup 3))
-(match_dup 2)))
-  (clobber (scratch:SWI))])
+  [(set (reg:CCC FLAGS_REG)
+   (compare:CCC
+ (plus:SWI (match_dup 2) (match_dup 3))
+ (match_dup 2)))
(set (pc)
(if_then_else (match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)])
  (label_ref (match_operand 0))
@@ -12926,12 +12920,10 @@
  (pc)))]
   "TARGET_64BIT
&& IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)"
-  [(parallel
- [(set (reg:CCZ FLAGS_REG)
-  (compare:CCZ
-(lshiftrt:DI (match_dup 2) (match_dup 4))
-(const_int 0)))
-  (clobber (scratch:DI))])
+  [(set (reg:CCZ FLAGS_REG)
+   (compare:CCZ
+ (lshiftrt:DI (match_dup 2) (match_dup 4))
+ (const_int 0)))
(set (pc)
(if_then_else (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)])
  (label_ref (match_operand 0))
@@ -18581,9 +18573,8 @@
&& INTVAL (operands[2]) != -1
&& INTVAL (operands[2]) != 2147483647"
   [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
-   (parallel [(set (match_dup 0)
-  (neg:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0
- (clobber (reg:CC FLAGS_REG))])]
+   (set (match_dup 0)
+   (neg:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0]
   "operands[2] = GEN_INT (INTVAL (operands[2]) + 1);")
 
 (define_split
@@ -18594,9 +18585,8 @@
(const_int 0]
   ""
   [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (const_int 1)))
-   (parallel [(set (match_dup 0)
-  (neg:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0
- (clobber (reg:CC FLAGS_REG))])])
+   (set (match_dup 0)
+   (neg:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0])
 
 (define_split
   [(set (match_operand:SWI 0 "register_operand")
@@ -18605,13 +18595,10 @@
(match_operand 1 "int_nonimmediate_operand")
(const_int 0]
   ""
-  [(parallel [(set (reg:CCC FLAGS_REG)
-  (ne:CCC (match_dup 1) (const_int 0)))
- (clobber (match_dup 2))])
-   (parallel [(set (match_dup 0)
-  (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0
- (clobber (reg:CC FLAGS_REG))])]
-  "operands[2] = gen_rtx_SCRATCH (GET_MODE (operands[1]));")
+  [(set (reg:CCC FLAGS_REG)
+   (ne:CCC (match_dup 1) (const_int 0)))
+   (set (match_dup 0)
+   (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0])
 
 (define_insn "*movcc_noc"
   [(set (match_operand:SWI248 0 "register_operand" "=r,r")


[patch, shared coarrays, committed] Fix

2020-12-30 Thread Thomas Koenig via Gcc-patches

Hello world,

I just committed the attached patch to the branch
as 
https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=4726e39b0be3c0bc55e43d2d300f0d0b9529d883 
.


It is sometimes astonishing, if you shake code
a bit, how many bugs came crawling out :-)

Best regards

Thomas


Make STAT and ERRMSG work on ALLOCATE, move error handling to library.

This makes STAT and ERRMSG work on ALLOCATE.  It also separates
the allocation of coarrays into two functions: One without error
checking, which is called by compiler-generated code, and one
with error checking for call from user code.

In the course of looking at this, it was also noticed that
allocatable coarrays were not automatically deallocated;
this is now also fixed.  Also, saved allocatable coarrays
are now saved.

gcc/fortran/ChangeLog:

* trans-array.c (gfc_allocate_shared_coarray): Remove extra
arguments, just build the call.
(allocate_shared_coarray_chk): New function.
(gfc_array_allocate): Adjust where to set the offset.
Error handling is done in the library for shared coarrays.
(gfc_trans_deferred_array): No early return for allocatable
shared coarrays.
* trans-array.h (gfc_array_allocate): Adjust prototype.
(gfc_allocate_shared_coarray): Likewise.
* trans-decl.c: Rename gfor_fndecl_cas_coarray_allocate to
gfor_fndecl_cas_coarray_alloc for
brevity.  Add gfor_fndecl_cas_coarray_alloc_chk.
(gfc_build_builtin_function_decls): Likewise.
(gfc_trans_shared_coarray): Adjust calling sequence for
gfc_allocate_shared_coarray.
(gfc_trans_deferred_vars): Correct handling of saved
allocatable shared coarrays.
* trans-stmt.c (gfc_trans_sync): Adjust whitespace.o
(coarray_alloc_p): Remove.
(gfc_trans_allocate): Add shared_coarray variable to adjust
status and errmsg handling.
* trans.h: Rename gfor_fndecl_cas_coarray_allocate to
gfor_fndecl_cas_coarray_alloc for brevity.  Add
gfor_fndecl_cas_coarray_alloc_chk.

libgfortran/ChangeLog:

* caf_shared/coarraynative.c (test_for_cas_errors): Correct
handling of stat.
* caf_shared/libcoarraynative.h (STAT_ERRMSG_ENTRY_CHECK): Use
unlikely in condition.
(STAT_ERRMSG_ENTRY_CHECK_RET): Likewise.
* caf_shared/wrapper.c (cas_coarray_alloc): Adjust arguments.
Call cas_coarray_alloc_work.
(cas_coarray_alloc_chk): New function.
(cas_coarray_alloc_work): New function.

gcc/testsuite/ChangeLog:

* gfortran.dg/caf-shared/allocate_1.f90: Adjust number of calls to
sync_all.
* gfortran.dg/caf-shared/allocate_status_1.f90: New test.
* gfortran.dg/caf-shared/automatic_deallocate_1.f90: New test.
* gfortran.dg/caf-shared/save_allocatable_1.f90: New test.
diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index 58aaa5f781d..998ec959402 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -5982,12 +5982,29 @@ gfc_cas_get_allocation_type (gfc_symbol * sym)
  return GFC_NCA_NORMAL_COARRAY;
 }
 
+/* Allocate a shared coarray from a constructor, without checking.  */
+
+void
+gfc_allocate_shared_coarray (stmtblock_t *b, tree decl, tree size, int corank,
+			 int alloc_type)
+{
+  gfc_add_expr_to_block (b,
+build_call_expr_loc (input_location, gfor_fndecl_cas_coarray_alloc,
+			 4, gfc_build_addr_expr (pvoid_type_node, decl),
+			 size, build_int_cst (integer_type_node, corank),
+			 build_int_cst (integer_type_node, alloc_type)));
+}
+
+/* Allocate a shared coarray from user space, with checking.  */
+
 void
-gfc_allocate_shared_coarray (stmtblock_t *b, tree decl, tree size, int rank,
-			 int corank, int alloc_type, tree status,
-			 tree errmsg, tree errlen, bool calc_offset)
+allocate_shared_coarray_chk (stmtblock_t *b, tree decl, tree size, int rank,
+ int corank, int alloc_type, tree status,
+ tree errmsg, tree errlen)
 {
   tree st, err, elen;
+  int i;
+  tree offset, stride, lbound, mult;
 
   if (status == NULL_TREE)
 st = null_pointer_node;
@@ -5996,28 +6013,25 @@ gfc_allocate_shared_coarray (stmtblock_t *b, tree decl, tree size, int rank,
 
   err = errmsg == NULL_TREE ? null_pointer_node : errmsg;
   elen = errlen == NULL_TREE ? build_int_cst (gfc_charlen_type_node, 0) : errlen;
+
   gfc_add_expr_to_block (b,
-	build_call_expr_loc (input_location, gfor_fndecl_cas_coarray_allocate,
-			 7, gfc_build_addr_expr (pvoid_type_node, decl),
-			 size, build_int_cst (integer_type_node, corank),
-			 build_int_cst (integer_type_node, alloc_type),
-			 st, err, elen));
-  if (calc_offset)
-{
-  int i;
-  tree offset, stride, lbound, mult;
-  offset = build_int_cst (gfc_array_index_type, 0);
-  for (i = 0; i < rank + corank; i++)
-	{
-	  stride = gfc_conv_array_stride (decl, i);
-	  lbound = gfc_conv_array_lbound (decl, i);
-	  mult = f

[PATCH, libstdc++] Add to pch header.

2020-12-30 Thread Edward Smith-Rowland via Gcc-patches
I think we want to add source_location to the precompiled header...

Ok if it passes testing?

Ed Smith-Rowland


p
Description: Binary data


libgo patch committed: Update to Go1.16beta1 release

2020-12-30 Thread Ian Lance Taylor via Gcc-patches
I've committed a patch to update libgo to the Go 1.16beta1 release.

This patch does not include support for the new //go:embed directive
that will be available in Go 1.16.1 (https://golang.org/issue/41191)
Support for that requires compiler changes, which will come later.

As usual with these big updates, I have not included the complete
changes in this e-mail message, only changes that are gccgo-specific.

Testing this requires some changes to gotools.

Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu.  Committed
to mainline.

Ian

* Makefile.am (check-runtime): Don't create check-runtime-dir.
(mostlyclean-local): Don't remove check-runtime-dir.
(check-go-tool, check-vet): Copy in go.mod and modules.txt.
(check-cgo-test, check-carchive-test): Add go.mod file.
* Makefile.in: Regenerate.
0c4cf80ab72765e5e1984f7d228822b945541915
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 1e461f06e95..fc5ef4498dd 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-d67579759e1769c08148304b2d378ec0b05637d6
+47bdc8bb36f16f9d1dec72df5dd6b45d7b0b0725
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/gotools/Makefile.am b/gotools/Makefile.am
index 1b8702e98bb..3bbccb96c28 100644
--- a/gotools/Makefile.am
+++ b/gotools/Makefile.am
@@ -101,7 +101,7 @@ MOSTLYCLEANFILES = \
 
 mostlyclean-local:
if test -d check-go-dir; then chmod -R u+w check-go-dir; fi
-   rm -rf check-go-dir check-runtime-dir cgo-test-dir carchive-test-dir \
+   rm -rf check-go-dir cgo-test-dir carchive-test-dir \
check-vet-dir gocache-test
 
 if NATIVE
@@ -210,6 +210,11 @@ check-go-tool: go$(EXEEXT) $(noinst_PROGRAMS) check-head 
check-gccgo check-gcc
if test -d check-go-dir; then chmod -R u+w check-go-dir; fi
rm -rf check-go-dir cmd_go-testlog
$(MKDIR_P) check-go-dir/src/cmd/go
+   cp $(libgosrcdir)/go.mod check-go-dir/src/
+   cp $(cmdsrcdir)/go.mod check-go-dir/src/cmd/
+   $(MKDIR_P) check-go-dir/src/vendor check-go-dir/src/cmd/vendor
+   cp $(libgosrcdir)/vendor/modules.txt check-go-dir/src/vendor/
+   cp $(libgosrcdir)/cmd/vendor/modules.txt check-go-dir/src/cmd/vendor/
cp $(cmdsrcdir)/go/*.go check-go-dir/src/cmd/go/
cp -r $(cmdsrcdir)/go/internal check-go-dir/src/cmd/go/
cp $(libgodir)/zdefaultcc.go check-go-dir/src/cmd/go/internal/cfg/
@@ -234,8 +239,7 @@ check-go-tool: go$(EXEEXT) $(noinst_PROGRAMS) check-head 
check-gccgo check-gcc
 # but the runtime tests use the go tool heavily, so testing
 # here too will catch more problems.
 check-runtime: go$(EXEEXT) $(noinst_PROGRAMS) check-head check-gccgo check-gcc
-   rm -rf check-runtime-dir runtime-testlog
-   $(MKDIR_P) check-runtime-dir
+   rm -f runtime-testlog
@abs_libgodir=`cd $(libgodir) && $(PWD_COMMAND)`; \
LD_LIBRARY_PATH=`echo $${abs_libgodir}/.libs:$${LD_LIBRARY_PATH} | sed 
's,::*,:,g;s,^:*,,;s,:*$$,,'`; \
export LD_LIBRARY_PATH; \
@@ -256,6 +260,7 @@ check-runtime: go$(EXEEXT) $(noinst_PROGRAMS) check-head 
check-gccgo check-gcc
 check-cgo-test: go$(EXEEXT) $(noinst_PROGRAMS) check-head check-gccgo check-gcc
rm -rf cgo-test-dir cgo-testlog
$(MKDIR_P) cgo-test-dir/misc/cgo
+   echo 'module misc' > cgo-test-dir/misc/go.mod
cp -r $(libgomiscdir)/cgo/test cgo-test-dir/misc/cgo/
@abs_libgodir=`cd $(libgodir) && $(PWD_COMMAND)`; \
echo "cd cgo-test-dir/misc/cgo/test && $(ECHO_ENV) GOTRACEBACK=2 
$(abs_builddir)/go$(EXEEXT) test -test.short 
-test.timeout=$(GOTOOLS_TEST_TIMEOUT)s -test.v" > cgo-testlog
@@ -270,6 +275,7 @@ check-cgo-test: go$(EXEEXT) $(noinst_PROGRAMS) check-head 
check-gccgo check-gcc
 check-carchive-test: go$(EXEEXT) $(noinst_PROGRAMS) check-head check-gccgo 
check-gcc
rm -rf carchive-test-dir carchive-testlog
$(MKDIR_P) carchive-test-dir/misc/cgo
+   echo 'module misc' > carchive-test-dir/misc/go.mod
cp -r $(libgomiscdir)/cgo/testcarchive carchive-test-dir/misc/cgo/
@abs_libgodir=`cd $(libgodir) && $(PWD_COMMAND)`; \
echo "cd carchive-test-dir/misc/cgo/testcarchive && $(ECHO_ENV) 
LIBRARY_PATH=`echo $${abs_libgodir}/.libs` $(abs_builddir)/go$(EXEEXT) test 
-test.timeout=$(GOTOOLS_TEST_TIMEOUT)s -test.v" > carchive-testlog
@@ -283,6 +289,11 @@ check-carchive-test: go$(EXEEXT) $(noinst_PROGRAMS) 
check-head check-gccgo check
 check-vet: go$(EXEEXT) $(noinst_PROGRAMS) check-head check-gccgo check-gcc
rm -rf check-vet-dir cmd_vet-testlog
$(MKDIR_P) check-vet-dir/src/cmd/internal 
check-vet-dir/src/cmd/vendor/golang.org/x
+   cp $(libgosrcdir)/go.mod check-vet-dir/src/
+   cp $(cmdsrcdir)/go.mod check-vet-dir/src/cmd/
+   $(MKDIR_P) check-vet-dir/src/vendor check-vet-dir/src/cmd/vendor
+   cp $(libgosrcdir)/vendor/modules.txt check-vet-dir/src/vendor/
+   cp $(libgosrcdir)/cmd/vendor/modules.txt ch

[committed] d: Mangled Symbols now back reference types and identifiers

2020-12-30 Thread Iain Buclaw via Gcc-patches
Hi,

With this patch, symbols with `extern(D)` linkage are now mangled using
back references to types and identifiers if these occur more than once
in the mangled name as emitted before.  This reduces symbol length,
especially with chained expressions of templated functions with
Voldemort return types.

For example, the average symbol length of the 127000+ symbols created by
a libphobos unittest build is reduced by a factor of about 3, while the
longest symbol shrinks from 416133 to 1142 characters.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32/-mx32, with
some further testing done on x86_64-apple-darwin.

Committed to mainline.

Regards
Iain.

---
gcc/d/ChangeLog:

* dmd/MERGE: Merge upstream dmd 2bd4fc3fe.
---
 gcc/d/dmd/MERGE   |   2 +-
 gcc/d/dmd/dmangle.c   | 319 +++---
 gcc/d/dmd/dtemplate.c | 116 +--
 .../gdc.test/compilable/testInference.d   |   6 +-
 .../gdc.test/fail_compilation/fail12485.d |   9 +-
 .../gdc.test/runnable/imports/testmangle.d|  66 
 gcc/testsuite/gdc.test/runnable/link6574.d|  10 +-
 gcc/testsuite/gdc.test/runnable/mangle.d  |  82 +++--
 gcc/testsuite/gdc.test/runnable/template4.d   |  31 +-
 gcc/testsuite/gdc.test/runnable/template9.d   |  13 +-
 gcc/testsuite/gdc.test/runnable/testconst.d   |   3 +-
 11 files changed, 429 insertions(+), 228 deletions(-)
 create mode 100644 gcc/testsuite/gdc.test/runnable/imports/testmangle.d

diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE
index 4fa62a9f56a..1f695b9d23c 100644
--- a/gcc/d/dmd/MERGE
+++ b/gcc/d/dmd/MERGE
@@ -1,4 +1,4 @@
-45fa6cfd20827bb4252a616dc789514a1e673687
+2bd4fc3fed8b8cd9760e77c6b2a1905cd84d0e70
 
 The first line of this file holds the git revision number of the last
 merge done from the dlang/dmd repository.
diff --git a/gcc/d/dmd/dmangle.c b/gcc/d/dmd/dmangle.c
index 8f869266871..f6eee52afbf 100644
--- a/gcc/d/dmd/dmangle.c
+++ b/gcc/d/dmd/dmangle.c
@@ -10,6 +10,7 @@
 
 #include "root/dsystem.h"
 #include "root/root.h"
+#include "root/aav.h"
 
 #include "mangle.h"
 #include "init.h"
@@ -133,13 +134,114 @@ void MODtoDecoBuffer(OutBuffer *buf, MOD mod)
 class Mangler : public Visitor
 {
 public:
+AA *types;
+AA *idents;
 OutBuffer *buf;
 
 Mangler(OutBuffer *buf)
 {
+this->types = NULL;
+this->idents = NULL;
 this->buf = buf;
 }
 
+/**
+* writes a back reference with the relative position encoded with base 26
+*  using upper case letters for all digits but the last digit which uses
+*  a lower case letter.
+* The decoder has to look up the referenced position to determine
+*  whether the back reference is an identifer (starts with a digit)
+*  or a type (starts with a letter).
+*
+* Params:
+*  pos   = relative position to encode
+*/
+void writeBackRef(size_t pos)
+{
+buf->writeByte('Q');
+const size_t base = 26;
+size_t mul = 1;
+while (pos >= mul * base)
+mul *= base;
+while (mul >= base)
+{
+unsigned char dig = (unsigned char)(pos / mul);
+buf->writeByte('A' + dig);
+pos -= dig * mul;
+mul /= base;
+}
+buf->writeByte('a' + (unsigned char)pos);
+}
+
+/**
+* Back references a non-basic type
+*
+* The encoded mangling is
+*   'Q' 
+*
+* Params:
+*  t = the type to encode via back referencing
+*
+* Returns:
+*  true if the type was found. A back reference has been encoded.
+*  false if the type was not found. The current position is saved for 
later back references.
+*/
+bool backrefType(Type *t)
+{
+if (!t->isTypeBasic())
+{
+size_t *p = (size_t *)dmd_aaGet(&types, (void *)t);
+if (*p)
+{
+writeBackRef(buf->length() - *p);
+return true;
+}
+*p = buf->length();
+}
+return false;
+}
+
+/**
+* Back references a single identifier
+*
+* The encoded mangling is
+*   'Q' 
+*
+* Params:
+*  id = the identifier to encode via back referencing
+*
+* Returns:
+*  true if the identifier was found. A back reference has been encoded.
+*  false if the identifier was not found. The current position is saved 
for later back references.
+*/
+bool backrefIdentifier(Identifier *id)
+{
+size_t *p = (size_t *)dmd_aaGet(&idents, (void *)id);
+if (*p)
+{
+writeBackRef(buf->length() - *p);
+return true;
+}
+*p = buf->length();
+return false;
+}
+
+void mangleSymbol(Dsymbol *s)
+{
+s->accept(this);
+}
+
+void mangleType(Type *t)
+{
+if (!backrefType(t))
+t->accept(this);
+}
+
+void mangleIdentifier(Id

Re: [PATCH, libstdc++] Add to pch header.

2020-12-30 Thread Jonathan Wakely via Gcc-patches

On 30/12/20 17:54 -0500, Edward Smith-Rowland wrote:

I think we want to add source_location to the precompiled header...

Ok if it passes testing?


OK, thanks.




[PATCH] sccvn: Consider def BB in rpo_vn_valueize [PR98464]

2020-12-30 Thread Kewen.Lin via Gcc-patches
Hi,

As PR98464 shows, this patch is to make rpo_vn_valueize
consider the definition basic block of name, to sync
with what we do in function eliminate_stmt.

Bootstrapped/regtested on powerpc64le-linux-gnu P9.

Full SPEC2017 build/run passed on P9.

BR,
Kewen

gcc/ChangeLog:

PR tree-optimization/98464
* tree-ssa-sccvn.c (rpo_vn_valueize): Use the def basic block
to keep consistent with eliminate_stmt.

gcc/testsuite/ChangeLog:

PR tree-optimization/98464
* g++.dg/tree-ssa/pr98464.C: New test.

---
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr98464.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr98464.C
new file mode 100644
index 000..1cbc4a8ef8d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr98464.C
@@ -0,0 +1,186 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O3 -fno-tree-dce" } */
+
+/* This case is to check there is no ICE.  */
+
+template  class _Op, typename... _Args>
+struct __detector {
+  using type = _Op<_Args...>;
+};
+
+template  class _Op, typename... _Args>
+using __detected_or = __detector<_Default, void, _Op, _Args...>;
+template  class _Op, typename... _Args>
+using __detected_or_t = typename __detected_or<_Default, _Op, _Args...>::type;
+template  struct __replace_first_arg;
+template  class _Template, typename _Up, typename _Tp,
+  typename... _Types>
+struct __replace_first_arg<_Template<_Tp, _Types...>, _Up> {
+  using type = _Template<_Up>;
+};
+
+template  class min_pointer;
+class MoveOnly;
+struct pointer_traits {
+  template 
+  using rebind = typename __replace_first_arg, _Up>::type;
+};
+
+template  class __normal_iterator {
+public:
+  __normal_iterator(_Iterator);
+};
+
+struct __allocator_traits_base {
+  template  using __pointer = typename _Tp::pointer;
+};
+
+template  struct allocator_traits : __allocator_traits_base {
+  typedef typename _Alloc::value_type value_type;
+  using pointer = __detected_or_t;
+  template  struct _Ptr {
+using type = pointer_traits::rebind<_Tp>;
+  };
+  using const_pointer = typename _Ptr::type;
+  using size_type = int;
+  static pointer allocate(_Alloc __a, size_type __n) {
+return __a.allocate(__n);
+  }
+};
+
+template 
+void _Destroy(_ForwardIterator __first, _ForwardIterator __last, _Allocator) {
+  for (; __first != __last; ++__first)
+;
+}
+
+template 
+_ForwardIterator __uninitialized_copy_a(_InputIterator, _ForwardIterator,
+_Allocator);
+
+template 
+_ForwardIterator __uninitialized_move_if_noexcept_a(_InputIterator __last,
+_ForwardIterator __result,
+_Allocator __alloc) {
+  return __uninitialized_copy_a(__last, __result, __alloc);
+}
+
+template 
+_ForwardIterator __uninitialized_default_n_a(_ForwardIterator __first,
+ _Size __n, _Allocator) {
+  for (; __n; --__n, ++__first)
+;
+  return __first;
+}
+
+template  struct _Vector_base {
+  typedef _Alloc _Tp_alloc_type;
+  typedef typename _Tp_alloc_type ::pointer pointer;
+  struct _Vector_impl_data {
+pointer _M_start;
+pointer _M_finish;
+pointer _M_end_of_storage;
+  };
+  struct _Vector_impl : _Tp_alloc_type, _Vector_impl_data {
+_Vector_impl(_Tp_alloc_type) {}
+  };
+  _Vector_base(long __n, _Alloc __a) : _M_impl(__a) {
+_M_impl._M_end_of_storage = _M_impl._M_start + __n;
+  }
+  _Vector_impl _M_impl;
+  pointer _M_allocate(long __n) {
+return __n ? allocator_traits<_Tp_alloc_type>::allocate(_M_impl, __n)
+   : pointer();
+  }
+};
+
+template  class vector : _Vector_base<_Alloc> {
+public:
+  typedef typename _Alloc::pointer pointer;
+  typedef __normal_iterator::const_pointer>
+  const_iterator;
+  typedef _Alloc allocator_type;
+  vector(long __n, allocator_type __a = allocator_type())
+  : _Vector_base<_Alloc>(__n, __a) {
+this->_M_impl._M_finish =
+__uninitialized_default_n_a(this->_M_impl._M_start, __n, 0);
+  }
+  ~vector() { _Destroy(this->_M_impl._M_start, this->_M_impl._M_finish, 0); }
+  const_iterator cbegin() { return this->_M_impl._M_start; }
+  typename _Alloc::value_type operator[](long) {
+return *this->_M_impl._M_start;
+  }
+  void insert(const_iterator, MoveOnly &&) {
+if (this->_M_impl._M_finish != this->_M_impl._M_end_of_storage)
+  ;
+else
+  _M_realloc_insert();
+  }
+  template  void _M_realloc_insert();
+};
+
+template 
+template 
+void vector<_Tp, _Alloc>::_M_realloc_insert() {
+  long __trans_tmp_6 = this->_M_impl._M_finish - this->_M_impl._M_start;
+  pointer __old_start = this->_M_impl._M_start;
+  pointer __old_finish = this->_M_impl._M_finish;
+  pointer __new_start(this->_M_allocate(__trans_tmp_6));
+  pointer __new_finish =
+  __uninitialized_move_if_noexcept_a(__old_finish, __new_finish, 0);
+  _Destroy(__old_start, __old_finish, 0);
+  this->_M_impl._M_start = __new_start;
+  this->_M_impl._M_fin