When inside a method then we know the this pointer points to
an object of at least the size of the methods base type. We
can use this to compute more references as not trapping and
enable invariant motion and in turn vectorization as for a
slightly modified version of the testcase in the PR.
Bootstrapped and tested on x86_64-unknown-linux-gnu.
I'll note the "trick" of relying on constraining the *this object
only works before inlining, for the origin of the testcase this
means that -flto inhibits the invaraint motion still. I don't
have a good idea to indicate object validity besides possibly
adding a "noop" *this read before each method call? So much
like a CLOBBER with some new kind assure-object-exists, but of
course not actually clobbering. Sth like
{}(OE) ={v} *this;
Any ideas? Any other concerns with the patch?
Thanks,
Richard.
PR tree-optimization/121685
* tree-eh.cc (ref_outside_object_p): Split out from ...
(tree_could_trap_p): ... here. Assume the this pointer
of a method refers to an object of at least size of its
base type.
* g++.dg/vect/pr121685-1.cc: New testcase.
---
gcc/testsuite/g++.dg/vect/pr121685-1.cc | 20 +++++++++++
gcc/tree-eh.cc | 45 +++++++++++++++++++------
2 files changed, 55 insertions(+), 10 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/vect/pr121685-1.cc
diff --git a/gcc/testsuite/g++.dg/vect/pr121685-1.cc
b/gcc/testsuite/g++.dg/vect/pr121685-1.cc
new file mode 100644
index 00000000000..faac9bad19d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr121685-1.cc
@@ -0,0 +1,20 @@
+// { dg-do compile }
+// { dg-additional-options "-mavx2" { target avx2 } }
+
+#include <vector>
+
+class Foo
+{
+public:
+ void fun (std::vector<int>& blacksq);
+ std::vector<int> m_mcowner;
+};
+
+void Foo::fun(std::vector<int>& blacksq)
+{
+ for (unsigned int i = 0; i < (unsigned int)blacksq.size(); i++)
+ if (blacksq[i])
+ m_mcowner[i]++;
+}
+
+// { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target avx2 } } }
diff --git a/gcc/tree-eh.cc b/gcc/tree-eh.cc
index 8cc81ebcf5e..5c62e6bcc38 100644
--- a/gcc/tree-eh.cc
+++ b/gcc/tree-eh.cc
@@ -2674,6 +2674,25 @@ access_in_bounds_of_type_p (tree type, poly_uint64 size,
poly_uint64 offset)
return true;
}
+/* Return whether an access at [off, refsz[ to an object spanning [0, size[
+ accesses storage outside of the object. */
+
+static bool
+ref_outside_object_p (tree size, poly_offset_int off, tree refsz)
+{
+ if (size == NULL_TREE
+ || refsz == NULL_TREE
+ || !poly_int_tree_p (size)
+ || !poly_int_tree_p (refsz)
+ || maybe_le (wi::to_poly_offset (size), off)
+ || maybe_gt (off + wi::to_poly_offset (refsz),
+ wi::to_poly_offset (size)))
+ return true;
+ /* Now we are sure the whole base of the access is inside
+ the object. */
+ return false;
+}
+
/* Return true if EXPR can trap, as in dereferencing an invalid pointer
location or floating point arithmetic. C.f. the rtl version, may_trap_p.
This routine expects only GIMPLE lhs or rhs input. */
@@ -2771,17 +2790,23 @@ tree_could_trap_p (tree expr)
return maybe_le (TREE_STRING_LENGTH (base), off);
tree size = DECL_SIZE_UNIT (base);
tree refsz = TYPE_SIZE_UNIT (TREE_TYPE (expr));
- if (size == NULL_TREE
- || refsz == NULL_TREE
- || !poly_int_tree_p (size)
- || !poly_int_tree_p (refsz)
- || maybe_le (wi::to_poly_offset (size), off)
- || maybe_gt (off + wi::to_poly_offset (refsz),
- wi::to_poly_offset (size)))
+ return ref_outside_object_p (size, off, refsz);
+ }
+ if (cfun
+ && TREE_CODE (TREE_TYPE (cfun->decl)) == METHOD_TYPE
+ && ((TREE_CODE (TREE_OPERAND (expr, 0)) == SSA_NAME
+ && SSA_NAME_IS_DEFAULT_DEF (TREE_OPERAND (expr, 0))
+ && (SSA_NAME_VAR (TREE_OPERAND (expr, 0))
+ == DECL_ARGUMENTS (cfun->decl)))
+ || TREE_OPERAND (expr, 0) == DECL_ARGUMENTS (cfun->decl)))
+ {
+ poly_offset_int off = mem_ref_offset (expr);
+ if (maybe_lt (off, 0))
return true;
- /* Now we are sure the whole base of the access is inside
- the object. */
- return false;
+ tree size = TYPE_SIZE_UNIT
+ (TYPE_METHOD_BASETYPE (TREE_TYPE (cfun->decl)));
+ tree refsz = TYPE_SIZE_UNIT (TREE_TYPE (expr));
+ return ref_outside_object_p (size, off, refsz);
}
return true;
--
2.43.0