This pattern converts (trunc_div (convert a) (convert b)) to
(convert (trunc_div a b)) when:

1. type, a, and b all have unsigned integeral types
2. a and b have the same type precision
3. type has type precision at least as larger as a and b

This is useful as wider divisions are typically more expensive.

To illustrate the effects, consider the following code snippet:

unsigned long long f(unsigned int a, unsigned int b) {
        unsigned long long all = a;
        return all / b;
}

Without the pattern, g++ -std=c++20 -O2 generates the following
assembly:

f(unsigned int, unsigned int):
        mov eax, edi
        mov esi, esi
        xor edx, edx
        div rsi
        ret

With the pattern, it generates this:

f(unsigned int, unsigned int):
        mov eax, edi
        xor edx, edx
        div esi
        ret

This is identical to what clang++ -std=c++20 -O2 generates.

Signed-off-by: Zhao Wei Liew <zhaoweil...@gmail.com>

        PR tree-optimization/103855

gcc/ChangeLog:

        * match.pd: Add pattern for (type)X / (type)Y.

gcc/testsuite/ChangeLog:

        * gcc.dg/tree-ssa/divide-8.c: New test.
        * gcc.dg/tree-ssa/divide-9.c: New test.
---
 gcc/match.pd                             | 15 +++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/divide-8.c |  9 +++++++++
 gcc/testsuite/gcc.dg/tree-ssa/divide-9.c | 10 ++++++++++
 3 files changed, 34 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/divide-8.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/divide-9.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 10f62284862..393b43756dd 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -684,6 +684,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
   (convert (trunc_mod @0 @1))))
 
+/* (type)X / (type)Y -> (type)(X / Y)
+   when the resulting type is at least precise as the original types
+   and when all the types are unsigned integral types. */
+(simplify
+ (trunc_div (convert @0) (convert @1))
+ (if (INTEGRAL_TYPE_P (type)
+      && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+      && INTEGRAL_TYPE_P (TREE_TYPE (@1))
+      && TYPE_UNSIGNED (type)
+      && TYPE_UNSIGNED (TREE_TYPE (@0))
+      && TYPE_UNSIGNED (TREE_TYPE (@1))
+      && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (TREE_TYPE (@1))
+      && TYPE_PRECISION (type) >= TYPE_PRECISION (TREE_TYPE (@0)))
+  (convert (trunc_div @0 @1))))
+
 /* x * (1 + y / x) - y -> x - y % x */
 (simplify
  (minus (mult:cs @0 (plus:s (trunc_div:s @1 @0) integer_onep)) @1)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/divide-8.c 
b/gcc/testsuite/gcc.dg/tree-ssa/divide-8.c
new file mode 100644
index 00000000000..489604c4eb6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/divide-8.c
@@ -0,0 +1,9 @@
+/* PR tree-optimization/103855 */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+unsigned int f(unsigned int a, unsigned int b) {
+    unsigned long long all = a;
+    return all / b;
+}
+
+/* { dg-final { scan-tree-dump-not "\(unsigned long long int)" "optimized" } } 
*/
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/divide-9.c 
b/gcc/testsuite/gcc.dg/tree-ssa/divide-9.c
new file mode 100644
index 00000000000..3e75a49b509
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/divide-9.c
@@ -0,0 +1,10 @@
+/* PR tree-optimization/103855 */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+unsigned long long f(unsigned int a, unsigned int b) {
+    unsigned long long all = a;
+    return all / b;
+}
+
+/* { dg-final { scan-tree-dump-times "\\\(unsigned long long int\\\)" 1 
"optimized" } } */
+
-- 
2.35.1

Reply via email to