[gcc r15-380] tree-optimization/114760 - check variants of >> and << in loop-niter

2024-05-11 Thread Di Zhao via Gcc-cvs
https://gcc.gnu.org/g:1b0919cd147a2b6ccdee2b1217bf0200bdcc87aa

commit r15-380-g1b0919cd147a2b6ccdee2b1217bf0200bdcc87aa
Author: dzhao.ampere 
Date:   Fri May 10 11:55:18 2024 +0800

tree-optimization/114760 - check variants of >> and << in loop-niter

When recognizing bit counting idiom, include pattern "x * 2"
for "x << 1", and "x / 2" for "x >> 1" (given x is unsigned).

gcc/ChangeLog:
PR tree-optimization/114760
* tree-ssa-loop-niter.cc (is_lshift_by_1): New function
to check if STMT is equivalent to x << 1.
(is_rshift_by_1): New function to check if STMT is
equivalent to x >> 1.
(number_of_iterations_cltz): Enhance the identification
of logical shift by one.
(number_of_iterations_cltz_complement): Enhance the
identification of logical shift by one.

gcc/testsuite/ChangeLog:
PR tree-optimization/114760
* gcc.dg/tree-ssa/pr114760-1.c: New test.
* gcc.dg/tree-ssa/pr114760-2.c: New test.

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c | 69 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c | 20 +
 gcc/tree-ssa-loop-niter.cc | 56 ++--
 3 files changed, 131 insertions(+), 14 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c
new file mode 100644
index ..9f10ccc3b510
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c
@@ -0,0 +1,69 @@
+/* PR tree-optimization/114760 */
+/* { dg-do compile } */
+/* { dg-require-effective-target clz } */
+/* { dg-require-effective-target ctz } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+unsigned
+ntz32_1 (unsigned x)
+{
+  int n = 32;
+  while (x != 0)
+{
+  n = n - 1;
+  x = x * 2;
+}
+  return n;
+}
+
+unsigned
+ntz32_2 (unsigned x)
+{
+  int n = 32;
+  while (x != 0)
+{
+  n = n - 1;
+  x = x + x;
+}
+  return n;
+}
+
+unsigned
+ntz32_3 (unsigned x)
+{
+  int n = 32;
+  while (x != 0)
+{
+  n = n - 1;
+  x = x << 1;
+}
+  return n;
+}
+
+#define PREC (__CHAR_BIT__ * __SIZEOF_INT__)
+int
+nlz32_1 (unsigned int b) {
+int c = PREC;
+
+while (b != 0) {
+   b >>= 1;
+   c --;
+}
+
+return c;
+}
+
+int
+nlz32_2 (unsigned int b) {
+int c = PREC;
+
+while (b != 0) {
+   b /= 2;
+   c --;
+}
+
+return c;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 3 "optimized" } } 
*/
+/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 2 "optimized" } } 
*/
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c
new file mode 100644
index ..e1b4c4b13382
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c
@@ -0,0 +1,20 @@
+/* PR tree-optimization/114760 */
+/* { dg-do compile } */
+/* { dg-require-effective-target clz } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+// Check that for signed type, there's no CLZ.
+#define PREC (__CHAR_BIT__ * __SIZEOF_INT__)
+int
+no_nlz32 (int b) {
+int c = PREC;
+
+while (b != 0) {
+   b /= 2;
+   c --;
+}
+
+return c;
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_ctz|\\.CLZ" "optimized" } } */
\ No newline at end of file
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index 0fde07e626f5..92db9c72ee47 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -2303,6 +2303,38 @@ build_cltz_expr (tree src, bool leading, bool 
define_at_zero)
   return call;
 }
 
+/* Returns true if STMT is equivalent to x << 1.  */
+
+static bool
+is_lshift_by_1 (gassign *stmt)
+{
+  if (gimple_assign_rhs_code (stmt) == LSHIFT_EXPR
+  && integer_onep (gimple_assign_rhs2 (stmt)))
+return true;
+  if (gimple_assign_rhs_code (stmt) == MULT_EXPR
+  && tree_fits_shwi_p (gimple_assign_rhs2 (stmt))
+  && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2)
+return true;
+  return false;
+}
+
+/* Returns true if STMT is equivalent to x >> 1.  */
+
+static bool
+is_rshift_by_1 (gassign *stmt)
+{
+  if (!TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (stmt
+return false;
+  if (gimple_assign_rhs_code (stmt) == RSHIFT_EXPR
+  && integer_onep (gimple_assign_rhs2 (stmt)))
+return true;
+  if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR
+  && tree_fits_shwi_p (gimple_assign_rhs2 (stmt))
+  && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2)
+return true;
+  return false;
+}
+
 /* See comment below for number_of_iterations_bitcount.
For c[lt]z, we have:
 
@@ -2400,14 +2432,12 @@ number_of_iterations_cltz (loop_p loop, edge exit,
 
   /* Make sure iv_2_stmt is a logical shift by one stmt:
  iv_2 = iv_1 {<<|>>} 1  */
-  if (!is_gimple_assign (iv_2_stmt)
-  || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXP

[gcc r15-4956] testsuite: fix testcase pr110279-1.c

2024-11-05 Thread Di Zhao via Gcc-cvs
https://gcc.gnu.org/g:5c19ba52519be975d4464b063d3d5a2c700dd241

commit r15-4956-g5c19ba52519be975d4464b063d3d5a2c700dd241
Author: Di Zhao 
Date:   Tue Nov 5 12:28:54 2024 +0800

testsuite: fix testcase pr110279-1.c

The test case is for targets that support FMA. Previously
the "target" selector is missed in dg-final command.

gcc/testsuite/ChangeLog:
PR tree-optimization/110279
* gcc.dg/pr110279-1.c: add target selector.

Diff:
---
 gcc/testsuite/gcc.dg/pr110279-1.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr110279-1.c 
b/gcc/testsuite/gcc.dg/pr110279-1.c
index a8c7257b28d3..c4f94ea5810c 100644
--- a/gcc/testsuite/gcc.dg/pr110279-1.c
+++ b/gcc/testsuite/gcc.dg/pr110279-1.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target { scalar_all_fma || { i?86-*-* x86_64-*-* } } } } 
*/
 /* { dg-options "-Ofast --param avoid-fma-max-bits=512 --param 
tree-reassoc-width=4 -fdump-tree-widening_mul-details" } */
 /* { dg-additional-options "-mcpu=generic" { target aarch64*-*-* } } */
 /* { dg-additional-options "-mfma" { target i?86-*-* x86_64-*-* } } */
@@ -64,4 +64,4 @@ foo3 (data_e a, data_e b, data_e c, data_e d)
   return result;
 }
 
-/* { dg-final { scan-tree-dump-times "Generated FMA" 3 "widening_mul"} } */
\ No newline at end of file
+/* { dg-final { scan-tree-dump-times "Generated FMA" 3 "widening_mul" } } */