[PATCH] testsuite/gcc.dg/strcmpopt_6.c: Add space in array for terminator.

2020-02-18 Thread Jon Beniston
Hi,

The patch adds space for the string terminator, as the function is called
with test_strcpy_strcmp_abc ("abcd")

ChangeLog

gcc/testsuite

2020-02-18  Jon Beniston 

* gcc.dg/strcmpopt_6.c: Add space in array for terminator.

diff --git a/gcc/testsuite/gcc.dg/strcmpopt_6.c
b/gcc/testsuite/gcc.dg/strcmpopt_6.c
index cb99294e5fa..06b7e3480db 100644
--- a/gcc/testsuite/gcc.dg/strcmpopt_6.c
+++ b/gcc/testsuite/gcc.dg/strcmpopt_6.c
@@ -33,7 +33,7 @@ test_strlen_lt6_strcmp_abcd (const char *s)
 __attribute__ ((noclone, noinline)) int
 test_strcpy_strcmp_abc (const char *s)
 {
-  char a[4];
+  char a[5];
   strcpy (a, s);
   return strcmp (a, "abc") == 0;
 }




RE: [RFC, vectorizer] Allow half_type for left shift in vect_operation_fits_smaller_type?

2017-11-20 Thread Jon Beniston
Hi Richard,

>Not digged long into this "interesting" function but this case is only
valid if type == 
>final type and if the result is not shifted back.
vect_recog_over_widening_pattern 
>works on a whole sequence of stmts after all, thus
>
>  b = (T_PROMOTED) a;
>  c = b << 2;
>  d = b >> 2;
>  e = (T_ORIG) b;
>
>would be miscompiled by your new case.

Here is the followup patch.
  
It supports half type left shift operation in
vect_recog_over_widening_patter
function.  As you suggested, the patch keeps half type lshift flag and gives
up on right shift operation or different new_type/use_type cases.
  
Two test cases are added, one should be recognized as pattern and the other
shouldn't.

Bootstrap OK and no gcc/g++ regression on x86_64/AArch64.
  
Does this look OK?

2017-11-20  Jon Beniston   gcc/
* tree-vect-patterns.c (vect_operation_fits_smaller_type): New
parameter.  Support half type lshift.
(vect_recog_over_widening_patter): Support half type lshift.

gcc/testsuite

* gcc.dg/vect/vect-over-widen-5.c: New test.
* gcc.dg/vect/vect-over-widen-6.c: New test.


diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-5.c
b/gcc/testsuite/gcc.dg/vect/vect-over-widen-5.c
new file mode 100644
index 000..a3e5a44
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-5.c
@@ -0,0 +1,46 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include 
+#include "tree-vect.h"
+
+#define N 256
+
+short a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+short b[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+
+__attribute__ ((noinline))
+int foo (void)
+{
+  int i;
+
+  for (i=0; i
+#include "tree-vect.h"
+
+#define N 256
+
+short a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+short b[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+
+__attribute__ ((noinline))
+int foo (void)
+{
+  int i;
+
+  for (i=0; i> 2);
+  a[i] = x;
+}
+}
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i=0; i> 2))
+  abort ();
+  }
+  
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "vect_recog_over_widening_pattern:
detected" "vect" } } */
+/* { dg-final { scan-tree-dump-not "pattern recognized" "vect" } } */
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 1cd6e57..daadcfb 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -1240,12 +1240,15 @@ vect_recog_widen_sum_pattern (vec *stmts,
tree *type_in,
  statements for STMT: the first one is a type promotion and the
second
  one is the operation itself.  We return the type promotion
statement
 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ
of
- the second pattern statement.  */
+ the second pattern statement.
+   HALF_TYPE_LSHIFT_P - Set to TRUE if STMT is left shift operation can be
+done in smaller type that has half precision of promoted type.  */
 
 static bool
 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
  tree *op0, tree *op1, gimple
**new_def_stmt,
- vec *stmts)
+ vec *stmts,
+ bool *half_type_lshift_p)
 {
   enum tree_code code;
   tree const_oprnd, oprnd;
@@ -1296,6 +1299,7 @@ vect_operation_fits_smaller_type (gimple *stmt, tree
def, tree *new_type,
 return false;
 }
 
+  unsigned HOST_WIDE_INT half_prec = TYPE_PRECISION (half_type);
   /* Can we perform the operation on a smaller type?  */
   switch (code)
 {
@@ -1305,11 +1309,11 @@ vect_operation_fits_smaller_type (gimple *stmt, tree
def, tree *new_type,
 if (!int_fits_type_p (const_oprnd, half_type))
   {
 /* HALF_TYPE is not enough.  Try a bigger type if possible.  */
-if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
+if (TYPE_PRECISION (type) < (half_prec * 4))
   return false;
 
-interm_type = build_nonstandard_integer_type (
-TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED
(type));
+interm_type = build_nonstandard_integer_type (half_prec * 2,
+ TYPE_UNSIGNED
(type));
 if (!int_fits_type_p (const_oprnd, interm_type))
   return false;
   }
@@ -1317,34 +1321,43 @@ vect_operation_fits_smaller_type (gimple *stmt, tree
def, tree *new_type,
 break;
 
   case LSHIFT_EXPR:
-/* Try intermediate type - HALF_TYPE is not enough for sure.  */
-if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
-  return false;
+/* Try intermediate type - smaller than HALF_TYPE.  */
+ 

[RFC, vectorizer] Allow single element vector types for vector reduction operations

2017-08-27 Thread Jon Beniston
Hi,

I have an out-of-tree GCC port and it is struggling supporting
auto-vectorization on some dot product instructions.  For example, I have an
instruction that takes three operands which are all 32-bit general
registers. The second and third operands will be treated as V2HI then do dot
product, and then generate an SI result which is then added to the first
operand which is SI as well.

I do see there is dot product recognizer in tree-vect-patters.c, however, I
found the following testcase still can't be auto-vectorized on my port which
has implemented all necessary dot product standard patterns.  This testcase
can't be auto-vectorized on other targets that have similar V2HI dot product
instructions as well, for example ARC.

=== test.c ===
#define K 4
#define M 4
#define N 256
int in[N*K][M];
int out[K];
int coeff[N][M];
void
foo (void)
{
  int i, j, k;
  int sum;
  for (k = 0; k < K; k++)
{
  sum = 0;
  for (j = 0; j < M; j++)
for (i = 0; i < N; i++)
  sum += in[i+k][j] * coeff[i][j];
  out[k] = sum;
}
}
===
  The reason that auto-vectorizer doesn't work seems to be that GCC doesn't
support single-element vector types in get_vectype_for_scalar_type_and_size.
tree-vect-stmts.c: get_vectype_for_scalar_type_and_size
  ...
  if (nunits <= 1)
return NULL_TREE;

So, I am thinking this actually should be relaxed to support more cases.  At
least on vector reduction operations which normally will have scalar result
with wider types than the element type of input operands.

I have tried to make the auto-vectorizer work for my V2HI dot product case,
with the patch attached. Is this the correct approach?

Cheers,
Jon

gcc/
2017-08-27  Jon Beniston 

* tree-vectorizer.h (get_vectype_for_scalar_type): New optional
parameter declaration.
* tree-vect-stmts.c (get_vectype_for_scalar_type_and_size): Add new
optional parameter "reduct_p".  Support single element vector types
if it is true.
(get_vectype_for_scalar_type): Add new parameter "reduct_p".
* tree-vect-patterns.c (vect_pattern_recog_1): Pass new parameter
"reduct_p".
* tree-vect-loop.c (vect_determine_vectorization_factor): Likewise.
(vect_model_reduction_cost): Likewise.
(get_initial_def_for_induction): Likewise.
(vect_create_epilog_for_reduction): Likewise.



fix-vec-reduct.patch
Description: Binary data


RE: [RFC, vectorizer] Allow single element vector types for vector reduction operations

2017-08-28 Thread Jon Beniston
Hi Richard,

>-  if (nunits < 1) /* Support V1SI.  */
>+  if (nunits < 1 || (nunits == 1 && !reduct_p))
> return NULL_TREE;
>
>doesn't seem to be against trunk which has
>
>  if (nunits <= 1)
>return NULL_TREE;
>
>so what happens if you just change that to
>
>  if (nunits < 1)
>return NULL_TREE;

Ah, that's what I first tried, and mistakenly sent the patch against that.

That did help the initial problem, but then I needed to add a lot of support
for the V1SI type in the backend (which just duplicated SI patterns) and
there are a few places where GCC seems to have sanity checks against vectors
that are only one element wide. A dot product producing a scalar result
seems natural. 

Also, as well as ARC benefitting from this, I think the TI c6x port would
too. That currently has a sdot_prodv2hi pattern that uses SI and V2HI types.

Cheers,
Jon




RE: [RFC, vectorizer] Allow single element vector types for vector reduction operations

2017-08-30 Thread Jon Beniston
Hi Richard,

>> Ah, that's what I first tried, and mistakenly sent the patch against
that.
>> 
>> That did help the initial problem, but then I needed to add a lot of 
>> support for the V1SI type in the backend (which just duplicated SI 
>> patterns) and there are a few places where GCC seems to have sanity 
>> checks against vectors that are only one element wide. A dot product 
>> producing a scalar result seems natural.
>
>Where do you need V1SI types?  The vectorizer should perform a SI extract 
>from V1SI here.  You need to elaborate a bit here.

After adding single element vector type support in the middle-end, at the
tree level, V1SI vector types would be the result type for the dot product
if the input operands were V2HI.

During RTL expansion, if V1SImode is accepted in the
TARGET_VECTOR_MODE_SUPPORTED_P hook, then V1SImode will be used after RTL
expansion and V1SImode patterns are needed, although they are actually
duplicates of SImode patterns.  I have now removed V1SImode from
TARGET_VECTOR_MODE_SUPPORTED_P, and they are turned into SI mode so there is
no need for the V1SI patterns now.

>> The vectorizer doesn't really support vector->scalar ops so V1SI 
>> feels more natural here.  That is, your patch looks a bit ugly -- 
>> there's nothing wrong with V1SI vector types.

I agree "there's nothing wrong with V1SI vector types" and the original
patch was trying to let vector reduction operation accept V1SI vector types.

However, if the only change was "<= 1" into "< 1" in
get_vectype_for_scalar_type_and_size, then it will cause a GCC assertion
failure in optab_for_tree_node for some shift operations. It seems all such
failures come from:

vect_pattern_recog_1:4185
  optab = optab_for_tree_code (code, type_in, optab_default);

The SUB_TYPE passed to optab_for_tree_code is "optab_default", however it is
possible that vect_pattern_recog_1 will generate gimple containing shift
operations, for example vect_recog_divmod_pattern will generate RSHIFT_EXPR,
while shift operation requires sub_type to be not optab_default?

  gcc/optabs-tree.h:

  /* An extra flag to control optab_for_tree_code's behavior.  This is
needed to
 distinguish between machines with a vector shift that takes a scalar
for the
 shift amount vs. machines that take a vector for the shift amount.  */

  enum optab_subtype
  {
optab_default,
optab_scalar,
optab_vector
  };

It looks to me like the other call sites of optab_for_tree_code which are
passing optab_default are mostly places where GCC is sure it is not a shift
operation.
Given TYPE_IN is returned from get_vectype_for_scalar_type, is it safe to
simply pass optab_vector in vect_pattern_recog_1?

I have verified the following middle-end fix also works for my port, it
passed also x86-64 bootstrap and there is no regression in gcc/g++
regression.

How is this new patch?

gcc/
2017-08-30  Jon Beniston  

* tree-vect-patterns.c (vect_pattern_recog_1): Pass optab_vector
when
calling optab_for_tree_code.
* tree-vect-stmts.c (get_vectype_for_scalar_type_and_size): Allow
single
element vector types.

diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index cfdb72c..4b39cb6 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -4182,7 +4182,7 @@ vect_pattern_recog_1 (vect_recog_func *recog_func,
  code = CALL_EXPR;
}

-  optab = optab_for_tree_code (code, type_in, optab_default);
+  optab = optab_for_tree_code (code, type_in, optab_vector);
   vec_mode = TYPE_MODE (type_in);
   if (!optab
   || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 013fb1f..fc62efb 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9098,7 +9098,8 @@ get_vectype_for_scalar_type_and_size (tree
scalar_type, unsigned size)
   else
 simd_mode = mode_for_vector (inner_mode, size / nbytes);
   nunits = GET_MODE_SIZE (simd_mode) / nbytes;
-  if (nunits <= 1)
+  /* NOTE: nunits == 1 is allowed to support single element vector types.
*/
+  if (nunits < 1)
 return NULL_TREE;

   vectype = build_vector_type (scalar_type, nunits);




RE: [RFC, vectorizer] Allow single element vector types for vector reduction operations

2017-08-30 Thread Jon Beniston
Hi Richard,

> I think the issue is that with TARGET_VECTOR_MODE_SUPPORTED_P false 
> for V1SI you'll get a SImode vector type and the
>
>  if (VECTOR_BOOLEAN_TYPE_P (type_in)
>  || VECTOR_MODE_P (TYPE_MODE (type_in)))
>
>check fails.  Try changing that to || VECTOR_TYPE_P (type_in).
>The else path should be hopefully dead ...
>
>> It looks to me like the other call sites of optab_for_tree_code which 
>> are passing optab_default are mostly places where GCC is sure it is 
>> not a shift operation.
>> Given TYPE_IN is returned from get_vectype_for_scalar_type, is it 
>> safe to simply pass optab_vector in vect_pattern_recog_1?
>
>I guess so.  Can you also try the above?

Changing VECTOR_MODE_P check into VECTOR_TYPE_P check also works for me, I
verified the following patch could vectorize my dot product case and there
is no regression on gcc tests on my port.

Meanwhile x86-64 bootstraps OK and no regression on gcc/g++ test.

 Does this look OK?


gcc/
2017-08-30  Jon Beniston  
Richard Biener  

diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index cfdb72c..5ebeac2 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -4150,7 +4150,7 @@ vect_pattern_recog_1 (vect_recog_func *recog_func,
   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
  
   if (VECTOR_BOOLEAN_TYPE_P (type_in)
-  || VECTOR_MODE_P (TYPE_MODE (type_in)))
+  || VECTOR_TYPE_P (type_in))
 {
   /* No need to check target support (already checked by the pattern
  recognition function).  */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 013fb1f..fc62efb 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9098,7 +9098,8 @@ get_vectype_for_scalar_type_and_size (tree
scalar_type, unsigned size)
   else
 simd_mode = mode_for_vector (inner_mode, size / nbytes);
   nunits = GET_MODE_SIZE (simd_mode) / nbytes;
-  if (nunits <= 1)
+  /* NOTE: nunits == 1 is allowed to support single element vector types.
*/
+  if (nunits < 1)
 return NULL_TREE;
 
   vectype = build_vector_type (scalar_type, nunits);




RE: [RFC, vectorizer] Allow single element vector types for vector reduction operations

2017-09-07 Thread Jon Beniston
Hi Bernd,

>This seems to ring a bell. I think I submitted a patch for this here 
>- is this the same problem?
>https://gcc.gnu.org/ml/gcc-patches/2010-12/msg01724.html

Looks like it.

Cheers,
Jon




[RFC, vectorizer] Allow half_type for left shift in vect_operation_fits_smaller_type?

2017-09-21 Thread Jon Beniston
Hi,

The GCC vectorizer can't vectorize the following loop even though the target
supports 2-lane SIMD left shift.

short a[256], b[256];
foo ()
{
  int i;
  for (i=0; i<256; i++)
{ a[i] = b[i] << 4; }
}

The reason seems to be GCC is promoting the source from short to int, then
performing left shift on int type and finally a type demotion is done to
covert it back to short. Below is the related tree dump:

 _2 = (intD.1) _1;
 # RANGE [-524288, 524272] NONZERO 4294967280
 _3 = _2 << 4;
 # RANGE [-32768, 32767] NONZERO 65520
 _4 = (short intD.10) _3;
 # .MEM_8 = VDEF <.MEM_14>
 aD.1888[i_13] = _4;

I checked tree-vect-patterns.c and found there is a pattern recognizer
"vect_recog_over_widening_pattern" to recognize such sequences already.

But, in vect_operation_fits_smaller_type, it only recognizes the sequences
when the promoted type is 4 times wider than the original type. The reason
seems to be the original proposal at:

  https://gcc.gnu.org/ml/gcc-patches/2011-07/msg01472.html

is to handle the following sequences where three types are involved, and the
width, T_PROMOTED = 2 * T_INTER = 4 * T_ORIG.

  T_ORIG a;
  T_PROMOTED b, c;
  T_INTER d;

  b = (T_PROMOTED) a;
  c = b << 2;
  d = (T_INTER) c;

While we could also handle the following sequence where only two types are
involved, and T_PROMOTED = 2 * T_ORIG

  T_ORIG a;
  T_PROMOTED b, c, d;

  b = (T_PROMOTED) a;
  c = b << 2;
  d = (T_ORIG) c;

Performing the left shift on T_ORIG directly should be equal to performing
it on T_PROMOTED then converting back to T_ORIG.

x86-64/AArch64/PPC64 bootstrap OK (finished on gcc farms) and no regression
on check-gcc/g++.

gcc/
2017-09-21  Jon Beniston 

* tree-vect-patterns.c (vect_opertion_fits_smaller_type): Allow
half_type for LSHIFT_EXPR.

diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index cdad261..0abf37c 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -1318,7 +1318,12 @@ vect_operation_fits_smaller_type (gimple *stmt, tree
def, tree *new_type,
 break;
 
   case LSHIFT_EXPR:
-/* Try intermediate type - HALF_TYPE is not enough for sure.  */
+/* Try half_type.  */
+if (TYPE_PRECISION (type) == TYPE_PRECISION (half_type) * 2
+   && vect_supportable_shift (code, half_type))
+  break;
+
+/* Try intermediate type.  */
 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
   return false;