This patch optimizes out calls to __dynamic_cast when the type being
cast to (or its destructor) is final, instead emitting a simple
comparison of the types' vtable addresses.

This is implemented and default in clang
(https://reviews.llvm.org/D154658), but can be turned off with
-fno-assume-unique-vtables, due to the problems it can cause with
unmerged vtables when using shared libraries. With the optimization
not really fitting well under any existing flag, I believe the addition
of an -fassume-unique-vtables flag would be appropriate.

I believe instead of adding this flag and the vtable addr comparison
logic, just replacing __dynamic_cast with type_info::operator== (whose
behaviour depends on __GXX_MERGED_TYPEINFO_NAMES) is not sufficient
as it performs additional dereferences compared to a simple vtable
addr comparison, still leaving room for further optimizations.

The RFC for this has been discussed previously
(https://gcc.gnu.org/pipermail/gcc-patches/2025-July/691295.html),
but I will be using a different e-mail going forward as I no longer
have access to the one I used there.

gcc/c-family/ChangeLog:
* c.opt (fassume-unique-vtables): Add new optimization flag

gcc/cp/ChangeLog:
* rtti.cc (build_dynamic_cast_1): Optimize dynamic_cast for
final target types by comparing vtable addresses when
-fassume-unique-vtables is enabled.

gcc/testsuite/ChangeLog:
* g++.dg/rtti/dyncast9.C: New test.
* g++.dg/rtti/dyncast10.C: New test.
---
 gcc/c-family/c.opt                    |  4 ++
 gcc/cp/rtti.cc                        | 55 +++++++++++++++++++++++++
 gcc/testsuite/g++.dg/rtti/dyncast10.C | 58 +++++++++++++++++++++++++++
 gcc/testsuite/g++.dg/rtti/dyncast9.C  | 58 +++++++++++++++++++++++++++
 4 files changed, 175 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/rtti/dyncast10.C
 create mode 100644 gcc/testsuite/g++.dg/rtti/dyncast9.C

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index a4cb755b0b9..0b0e6eb49af 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1747,6 +1747,10 @@ fassume-sane-operators-new-delete
 C++ ObjC++ Optimization Var(flag_assume_sane_operators_new_delete) Init(1)
 Assume C++ replaceable global operators new, new[], delete, delete[] don't
read or write visible global state.

+fassume-unique-vtables
+C++ ObjC++ Optimization Var(flag_assume_unique_vtables) Init(0)
+Assume that, throughout the program's lifetime, each pair of class types
representing the same class shares a common vtable.
+
 ; Define extra predefined macros for use in libgcc.
 fbuilding-libgcc
 C ObjC C++ ObjC++ Undocumented Var(flag_building_libgcc)
diff --git a/gcc/cp/rtti.cc b/gcc/cp/rtti.cc
index 1e787157b7b..63df9e66a91 100644
--- a/gcc/cp/rtti.cc
+++ b/gcc/cp/rtti.cc
@@ -764,6 +764,61 @@ build_dynamic_cast_1 (location_t loc, tree type, tree
expr,
   if (tc == REFERENCE_TYPE)
     expr1 = cp_build_addr_expr (expr1, complain);

+
+
+    /* If type is final, don't call dynamic_cast.
+     * Instead just check vtable equivalence at runtime.
+     * TYPE_FINAL_P does not return true for non-final class with
+     * final destructor overriding virtual though,
+     * so look through virtual functions for final destructor */
+
+    bool can_inherit = true;
+    if(flag_assume_unique_vtables)
+      {
+        can_inherit = !TYPE_FINAL_P (target_type);
+        tree vchain;
+        for (vchain = BINFO_VIRTUALS (TYPE_BINFO (target_type));
+          vchain && can_inherit;
+          vchain = TREE_CHAIN (vchain))
+    {
+      if (!DECL_DESTRUCTOR_P (BV_FN (vchain)))
+        continue;
+      if (!DECL_FINAL_P (BV_FN (vchain)))
+        continue;
+      can_inherit = false;
+    }
+      }
+
+    if (!can_inherit)
+      {
+        tree binfo = lookup_base (target_type, static_type, ba_check,
NULL, complain);
+
+          if (!binfo || binfo == error_mark_node)
+              return error_mark_node;
+
+        /* Retrieve vtable declaration and address.
+         * The offset-to-top field is adjusted for here. */
+        tree trgt_vptr = build_vtbl_address (
+                BINFO_VTABLE (binfo) ? binfo : TYPE_BINFO (target_type));
+
+        tree src_obj = cp_build_fold_indirect_ref (expr);
+        tree src_vptr = build_vfield_ref (src_obj, static_type);
+
+        tree fail_result = tc == REFERENCE_TYPE ? throw_bad_cast () :
nullptr_node;
+        tree succ_result = build_base_path (MINUS_EXPR, expr1, binfo,
true, complain);
+
+        /* Check vtable equivalence by vptr address */
+        tree cond = build2 (NE_EXPR, boolean_type_node, trgt_vptr,
src_vptr);
+        tree result = build3 (COND_EXPR, type, cond, fail_result,
succ_result);
+
+        SET_EXPR_LOCATION (result, loc);
+
+        if (tc == REFERENCE_TYPE)
+          return result;
+
+        return build_if_nonnull (expr, result, complain);
+      }
+
   elems[0] = expr1;
   elems[1] = td3;
   elems[2] = td2;
diff --git a/gcc/testsuite/g++.dg/rtti/dyncast10.C
b/gcc/testsuite/g++.dg/rtti/dyncast10.C
new file mode 100644
index 00000000000..e4c9b303cef
--- /dev/null
+++ b/gcc/testsuite/g++.dg/rtti/dyncast10.C
@@ -0,0 +1,58 @@
+// { dg-options "-fassume-unique-vtables" }
+// { dg-final { scan-tree-dump-not "__dynamic_cast" "original" } }
+
+extern "C" void abort ();
+
+struct A
+{
+  virtual ~A () {}
+};
+
+struct B final : public A
+{
+  ~B() {}
+  void p () { c++; }
+  int c;
+};
+struct C final : public A
+{
+  ~C() {}
+  void f () { c++; }
+  int c;
+};
+
+
+bool f (A* a) {
+  return dynamic_cast<B*>(a) != nullptr;
+}
+bool g (A& a) {
+  try
+    {
+      dynamic_cast<B&>(a).p();
+      return true;
+    }
+  catch (...)
+    {
+      return false;
+    }
+}
+
+int
+main (void)
+{
+  A a;
+  B b;
+  C c;
+  if (f (&a))
+    abort ();
+  if (g (a))
+    abort ();
+  if (!f (&b))
+    abort ();
+  if (!g (b))
+    abort ();
+  if (f (&c))
+    abort ();
+  if (g (c))
+    abort ();
+}
diff --git a/gcc/testsuite/g++.dg/rtti/dyncast9.C
b/gcc/testsuite/g++.dg/rtti/dyncast9.C
new file mode 100644
index 00000000000..4f3b2348d13
--- /dev/null
+++ b/gcc/testsuite/g++.dg/rtti/dyncast9.C
@@ -0,0 +1,58 @@
+// { dg-options "-fassume-unique-vtables" }
+// { dg-final { scan-tree-dump-not "__dynamic_cast" "original" } }
+
+extern "C" void abort ();
+
+struct A
+{
+  virtual ~A () {}
+};
+
+struct B : public A
+{
+  ~B() final {}
+  void p () { c++; }
+  int c;
+};
+struct C : public A
+{
+  ~C() final {}
+  void f () { c++; }
+  int c;
+};
+
+
+bool f (A* a) {
+  return dynamic_cast<B*>(a) != nullptr;
+}
+bool g (A& a) {
+  try
+    {
+      dynamic_cast<B&>(a).p();
+      return true;
+    }
+  catch (...)
+    {
+      return false;
+    }
+}
+
+int
+main (void)
+{
+  A a;
+  B b;
+  C c;
+  if (f (&a))
+    abort ();
+  if (g (a))
+    abort ();
+  if (!f (&b))
+    abort ();
+  if (!g (b))
+    abort ();
+  if (f (&c))
+    abort ();
+  if (g (c))
+    abort ();
+}
-- 
2.52.0

Reply via email to