https://github.com/Jason-VanBeusekom created 
https://github.com/llvm/llvm-project/pull/167011

This is a branch off of https://github.com/llvm/llvm-project/pull/159856, in 
which consists of the runtime portion of the changes required to support 
indirect function and virtual function calls on an `omp target device` when the 
virtual class / indirect function is mapped to the device from the host.

Key Changes

- Introduced a new flag OMP_DECLARE_TARGET_INDIRECT_VTABLE to mark VTable 
registrations
- Modified setupIndirectCallTable to support both VTable entries and indirect 
function pointers

Details:
The setupIndirectCallTable implementation was modified to support this 
registration type by retrieving the first address of the VTable and inferring 
the remaining data needed to build the indirect call table. Since the Vtables / 
Classes registered as indirect can be larger than 8 bytes, and the vtables may 
not be at the first address we either need to pass the size to 
__llvm_omp_indirect_call_lookup and have a check at each step of the binary 
search, or add multiple entries to the indirect table for each address 
registered. The latter was chosen.

This is PR (2/3) 
Register Vtable PR (1/3):  https://github.com/llvm/llvm-project/pull/159856,
Codegen / _llvm_omp_indirect_call_lookup PR (3/3): 
https://github.com/llvm/llvm-project/pull/159857

>From a00def3f20e166d4fb9328e6f0bc0742cd0afa31 Mon Sep 17 00:00:00 2001
From: jason-van-beusekom <[email protected]>
Date: Fri, 7 Nov 2025 10:39:38 -0600
Subject: [PATCH 1/2] [OpenMP][clang] Register Vtables on device for indirect
 calls - clang/llvm changes

- Register Vtable's on device during codegen
- Add support in OMPIRBuilder
- Add test cases for vtable codegen

Co-authored-by: Chi-Chun Chen <[email protected]>
Co-authored-by: Jeffery Sandoval <[email protected]>
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 129 ++++++++++++++++++
 clang/lib/CodeGen/CGOpenMPRuntime.h           |  20 +++
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |   4 +
 clang/lib/CodeGen/CGVTables.cpp               |   6 +
 clang/lib/CodeGen/CGVTables.h                 |   4 +
 clang/lib/CodeGen/CodeGenModule.h             |   3 +
 .../target_vtable_codegen_container.cpp       |  42 ++++++
 .../OpenMP/target_vtable_codegen_explicit.cpp |  48 +++++++
 ...rget_vtable_codegen_implicit_namespace.cpp |  43 ++++++
 ...rget_vtable_codegen_memberexpr_codegen.cpp |  56 ++++++++
 ...arget_vtable_codegen_mult_inherritence.cpp |  46 +++++++
 .../OpenMP/target_vtable_codegen_nested.cpp   |  82 +++++++++++
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |   5 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  20 ++-
 14 files changed, 504 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/OpenMP/target_vtable_codegen_container.cpp
 create mode 100644 clang/test/OpenMP/target_vtable_codegen_explicit.cpp
 create mode 100644 
clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp
 create mode 100644 
clang/test/OpenMP/target_vtable_codegen_memberexpr_codegen.cpp
 create mode 100644 
clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp
 create mode 100644 clang/test/OpenMP/target_vtable_codegen_nested.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a503aaf613e30..249dc08b0d139 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1771,12 +1771,129 @@ void CGOpenMPRuntime::emitDeclareTargetFunction(const 
FunctionDecl *FD,
     Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
   }
 
+  // Register the indirect Vtable:
+  // This is similar to OMPTargetGlobalVarEntryIndirect, except that the
+  // size field refers to the size of memory pointed to, not the size of
+  // the pointer symbol itself (which is implicitly the size of a pointer).
   OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
       Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
       llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
       llvm::GlobalValue::WeakODRLinkage);
 }
 
+void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
+                                                 const VarDecl *VD) {
+  // TODO: add logic to avoid duplicate vtable registrations per
+  // translation unit; though for external linkage, this should no
+  // longer be an issue - or at least we can avoid the issue by
+  // checking for an existing offloading entry.  But, perhaps the
+  // better approach is to defer emission of the vtables and offload
+  // entries until later (by tracking a list of items that need to be
+  // emitted).
+
+  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+
+  // Generate a new externally visible global to point to the
+  // internally visible vtable. Doing this allows us to keep the
+  // visibility and linkage of the associated vtable unchanged while
+  // allowing the runtime to access its value.  The externally
+  // visible global var needs to be emitted with a unique mangled
+  // name that won't conflict with similarly named (internal)
+  // vtables in other translation units.
+
+  // Register vtable with source location of dynamic object in map
+  // clause.
+  llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
+      CGM, OMPBuilder, VD->getCanonicalDecl()->getBeginLoc(),
+      VTable->getName());
+
+  llvm::GlobalVariable *Addr = VTable;
+  size_t PointerSize = CGM.getDataLayout().getPointerSize();
+  SmallString<128> AddrName;
+  OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(AddrName, 
EntryInfo);
+  AddrName.append("addr");
+
+  if (CGM.getLangOpts().OpenMPIsTargetDevice) {
+    Addr = new llvm::GlobalVariable(
+        CGM.getModule(), VTable->getType(),
+        /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
+        AddrName,
+        /*InsertBefore*/ nullptr, llvm::GlobalValue::NotThreadLocal,
+        CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
+    Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
+  }
+  OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
+      AddrName, VTable,
+      
CGM.getDataLayout().getTypeAllocSize(VTable->getInitializer()->getType()),
+      llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
+      llvm::GlobalValue::WeakODRLinkage);
+}
+
+void CGOpenMPRuntime::emitAndRegisterVTable(CodeGenModule &CGM,
+                                            CXXRecordDecl *CXXRecord,
+                                            const VarDecl *VD) {
+  // Register C++ VTable to OpenMP Offload Entry if it's a new
+  // CXXRecordDecl.
+  if (CXXRecord && CXXRecord->isDynamicClass() &&
+      !CGM.getOpenMPRuntime().VTableDeclMap.contains(CXXRecord)) {
+    CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(CXXRecord, VD);
+    CGM.EmitVTable(CXXRecord);
+    CodeGenVTables VTables = CGM.getVTables();
+    llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(CXXRecord);
+    if (VTablesAddr)
+      CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTablesAddr, VD);
+    // Emit VTable for all the fields containing dynamic CXXRecord
+    for (const FieldDecl *Field : CXXRecord->fields()) {
+      if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl())
+        emitAndRegisterVTable(CGM, RecordDecl, VD);
+    }
+    // Emit VTable for all dynamic parent class
+    for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
+      if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl())
+        emitAndRegisterVTable(CGM, BaseDecl, VD);
+    }
+  }
+};
+
+void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) {
+  // Register VTable by scanning through the map clause of OpenMP target 
region.
+  // Get CXXRecordDecl and VarDecl from Expr.
+  auto GetVTableDecl = [](const Expr *E) {
+    QualType VDTy = E->getType();
+    CXXRecordDecl *CXXRecord = nullptr;
+    if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
+      VDTy = RefType->getPointeeType();
+    if (VDTy->isPointerType())
+      CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl();
+    else
+      CXXRecord = VDTy->getAsCXXRecordDecl();
+
+    const VarDecl *VD = nullptr;
+    if (auto *DRE = dyn_cast<DeclRefExpr>(E))
+      VD = cast<VarDecl>(DRE->getDecl());
+    else if (auto *MRE = dyn_cast<MemberExpr>(E)){
+      printf("here\n");
+      if (auto *BaseDRE = dyn_cast<DeclRefExpr>(MRE->getBase())){
+        printf("here 1\n");
+        if (auto *BaseVD = dyn_cast<VarDecl>(BaseDRE->getDecl())){
+          VD = BaseVD;
+          printf("here 2\n");
+        }
+      }
+      }
+    return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
+  };
+  // Collect VTable from OpenMP map clause.
+  for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
+    for (const auto *E : C->varlist()) {
+      auto DeclPair = GetVTableDecl(E);
+      // Ensure VD is not null
+      if (DeclPair.second)
+        emitAndRegisterVTable(CGM, DeclPair.first, DeclPair.second);
+    }
+  }
+}
+
 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
                                                           QualType VarType,
                                                           StringRef Name) {
@@ -6249,6 +6366,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
         CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
     }
   }
+  registerVTable(D);
 }
 
 /// Checks if the expression is constant or does not have non-trivial function
@@ -9955,6 +10073,17 @@ void 
CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
   if (!S)
     return;
 
+  // Register vtable from device for target data and target directives.
+  // Add this block here since scanForTargetRegionsFunctions ignores
+  // target data by checking if S is a executable directive (target).
+  if (auto *E = dyn_cast<OMPExecutableDirective>(S);
+      E && isOpenMPTargetDataManagementDirective(E->getDirectiveKind())) {
+    // Don't need to check if it's device compile
+    // since scanForTargetRegionsFunctions currently only called
+    // in device compilation.
+    registerVTable(*E);
+  }
+
   // Codegen OMP target directives that offload compute to the device.
   bool RequiresDeviceCodegen =
       isa<OMPExecutableDirective>(S) &&
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h 
b/clang/lib/CodeGen/CGOpenMPRuntime.h
index eb04eceee236c..7f8a81d4090e2 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -605,6 +605,9 @@ class CGOpenMPRuntime {
                           LValue PosLVal, const OMPTaskDataTy::DependData 
&Data,
                           Address DependenciesArray);
 
+  /// Keep track of VTable Declarations so we don't register duplicate VTable.
+  llvm::SmallDenseMap<CXXRecordDecl *, const VarDecl *> VTableDeclMap;
+
 public:
   explicit CGOpenMPRuntime(CodeGenModule &CGM);
   virtual ~CGOpenMPRuntime() {}
@@ -1111,6 +1114,23 @@ class CGOpenMPRuntime {
   virtual void emitDeclareTargetFunction(const FunctionDecl *FD,
                                          llvm::GlobalValue *GV);
 
+  /// Register VTable to OpenMP offload entry.
+  /// \param VTable VTable of the C++ class.
+  /// \param RD C++ class decl.
+  virtual void registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
+                                          const VarDecl *VD);
+  /// Emit code for registering vtable by scanning through map clause
+  /// in OpenMP target region.
+  /// \param D OpenMP target directive.
+  virtual void registerVTable(const OMPExecutableDirective &D);
+
+  /// Emit and register VTable for the C++ class in OpenMP offload entry.
+  /// \param CXXRecord C++ class decl.
+  /// \param VD Variable decl which holds VTable.
+  virtual void emitAndRegisterVTable(CodeGenModule &CGM,
+                                     CXXRecordDecl *CXXRecord,
+                                     const VarDecl *VD);
+
   /// Creates artificial threadprivate variable with name \p Name and type \p
   /// VarType.
   /// \param VarType Type of the artificial threadprivate variable.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index d72cd8fbfd608..0b88f1dc5f0ea 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7617,6 +7617,10 @@ void CodeGenFunction::EmitOMPUseDeviceAddrClause(
 // Generate the instructions for '#pragma omp target data' directive.
 void CodeGenFunction::EmitOMPTargetDataDirective(
     const OMPTargetDataDirective &S) {
+  // Emit vtable only from host for target data directive.
+  if (!CGM.getLangOpts().OpenMPIsTargetDevice)
+    CGM.getOpenMPRuntime().registerVTable(S);
+
   CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
                                        /*SeparateBeginEndCalls=*/true);
 
diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index e14e883a55ac5..de4a67db313ea 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -38,6 +38,12 @@ llvm::Constant *CodeGenModule::GetAddrOfThunk(StringRef 
Name, llvm::Type *FnTy,
                                  /*DontDefer=*/true, /*IsThunk=*/true);
 }
 
+llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTable(const CXXRecordDecl *RD) 
{
+  llvm::GlobalVariable *VTable =
+      CGM.getCXXABI().getAddrOfVTable(RD, CharUnits());
+  return VTable;
+}
+
 static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk,
                                llvm::Function *ThunkFn, bool ForVTable,
                                GlobalDecl GD) {
diff --git a/clang/lib/CodeGen/CGVTables.h b/clang/lib/CodeGen/CGVTables.h
index 5c45e355fb145..37458eee02e34 100644
--- a/clang/lib/CodeGen/CGVTables.h
+++ b/clang/lib/CodeGen/CGVTables.h
@@ -122,6 +122,10 @@ class CodeGenVTables {
                          llvm::GlobalVariable::LinkageTypes Linkage,
                          const CXXRecordDecl *RD);
 
+  /// GetAddrOfVTable - Get the address of the VTable for the given record
+  /// decl.
+  llvm::GlobalVariable *GetAddrOfVTable(const CXXRecordDecl *RD);
+
   /// EmitThunks - Emit the associated thunks for the given global decl.
   void EmitThunks(GlobalDecl GD);
 
diff --git a/clang/lib/CodeGen/CodeGenModule.h 
b/clang/lib/CodeGen/CodeGenModule.h
index 3971b296b3f80..49dcba4b7618b 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -754,6 +754,9 @@ class CodeGenModule : public CodeGenTypeCache {
   // i32 @__isPlatformVersionAtLeast(i32, i32, i32, i32)
   llvm::FunctionCallee IsPlatformVersionAtLeastFn = nullptr;
 
+  //  Store indirect CallExprs that are within an omp target region
+  llvm::SmallPtrSet<const CallExpr *, 16> OMPTargetCalls;
+
   InstrProfStats &getPGOStats() { return PGOStats; }
   llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); 
}
 
diff --git a/clang/test/OpenMP/target_vtable_codegen_container.cpp 
b/clang/test/OpenMP/target_vtable_codegen_container.cpp
new file mode 100644
index 0000000000000..9fd4c6b736163
--- /dev/null
+++ b/clang/test/OpenMP/target_vtable_codegen_container.cpp
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple 
x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode  
-emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52 -stdlib=libc++
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple 
nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode 
 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path 
%t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 -stdlib=libc++ 
| FileCheck %s
+// expected-no-diagnostics
+
+// CHECK-DAG: @_ZTV7Derived
+// CHECK-DAG: @_ZTV4Base
+template <typename T>
+class Container {
+private:
+T value;
+public:
+Container() : value() {}
+Container(T val) : value(val) {}
+
+T getValue() const { return value; }
+
+void setValue(T val) { value = val; }
+};
+
+class Base {
+public:
+    virtual void foo() {}
+};
+class Derived : public Base {};
+
+class Test {
+public:
+    Container<Derived> v;
+};
+
+int main() {
+  Test test;
+  Derived d;
+  test.v.setValue(d);
+
+// Make sure we emit VTable for type indirectly (template specialized type)
+#pragma omp target map(test)
+  {
+      test.v.getValue().foo();
+  }
+  return 0;
+}
diff --git a/clang/test/OpenMP/target_vtable_codegen_explicit.cpp 
b/clang/test/OpenMP/target_vtable_codegen_explicit.cpp
new file mode 100644
index 0000000000000..001ed8fdd9cd7
--- /dev/null
+++ b/clang/test/OpenMP/target_vtable_codegen_explicit.cpp
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple 
x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode  
-emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple 
nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode 
 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path 
%t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
+// expected-no-diagnostics
+
+// Make sure both host and device compilation emit vtable for Dervied
+// CHECK-DAG: $_ZN7DerivedD1Ev = comdat any
+// CHECK-DAG: $_ZN7DerivedD0Ev = comdat any
+// CHECK-DAG: $_ZN7Derived5BaseAEi = comdat any
+// CHECK-DAG: $_ZN7Derived8DerivedBEv = comdat any
+// CHECK-DAG: $_ZN7DerivedD2Ev = comdat any
+// CHECK-DAG: $_ZN4BaseD2Ev = comdat any
+// CHECK-DAG: $_ZTV7Derived = comdat any
+class Base {
+public:
+
+  virtual ~Base() = default;
+
+  virtual void BaseA(int a) { }
+};
+
+// CHECK: @_ZTV7Derived = linkonce_odr unnamed_addr constant { [6 x ptr] }
+class Derived : public Base {
+public:
+
+  ~Derived() override = default;
+
+  void BaseA(int a) override { x = a; }
+
+  virtual void DerivedB() { }
+private:
+  int x;
+};
+
+int main() {
+
+  Derived d;
+  Base& c = d;
+  int a = 50;
+  // Should emit vtable for Derived since d is added to map clause
+#pragma omp target data map (to: d, a)
+  {
+ #pragma omp target map(d)
+     {
+       c.BaseA(a);
+     }
+  }
+  return 0;
+}
diff --git a/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp 
b/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp
new file mode 100644
index 0000000000000..364c55cd07985
--- /dev/null
+++ b/clang/test/OpenMP/target_vtable_codegen_implicit_namespace.cpp
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple 
x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode  
-emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple 
nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode 
 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path 
%t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
+// expected-no-diagnostics
+
+namespace {
+
+// Make sure both host and device compilation emit vtable for Dervied
+// CHECK-DAG: @_ZTVN12_GLOBAL__N_17DerivedE
+// CHECK-DAG: @_ZN12_GLOBAL__N_17DerivedD1Ev
+// CHECK-DAG: @_ZN12_GLOBAL__N_17DerivedD0Ev
+// CHECK-DAG: @_ZN12_GLOBAL__N_17Derived5BaseAEi
+// CHECK-DAG: @_ZN12_GLOBAL__N_17Derived8DerivedBEv
+class Base {
+public:
+  virtual ~Base() = default;
+  virtual void BaseA(int a) { }
+};
+
+class Derived : public Base {
+public:
+  ~Derived() override = default;
+  void BaseA(int a) override { x = a; }
+  virtual void DerivedB() { }
+private:
+  int x;
+};
+
+};
+
+int main() {
+
+  Derived d;
+  Base& c = d;
+  int a = 50;
+#pragma omp target data map (to: d, a)
+  {
+ #pragma omp target
+     {
+       c.BaseA(a);
+     }
+  }
+  return 0;
+}
diff --git a/clang/test/OpenMP/target_vtable_codegen_memberexpr_codegen.cpp 
b/clang/test/OpenMP/target_vtable_codegen_memberexpr_codegen.cpp
new file mode 100644
index 0000000000000..0535ba1dec741
--- /dev/null
+++ b/clang/test/OpenMP/target_vtable_codegen_memberexpr_codegen.cpp
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple 
x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode  
-emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple 
nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode 
 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path 
%t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
+// expected-no-diagnostics
+
+
+// CHECK-DAG: $_ZN4Base5BaseAEi = comdat any
+// CHECK-DAG: $_ZN7Derived5BaseAEi = comdat any
+// CHECK-DAG: $_ZN7Derived8DerivedBEv = comdat any
+// CHECK-DAG: $_ZN4BaseD1Ev = comdat any
+// CHECK-DAG: $_ZN4BaseD0Ev = comdat any
+// CHECK-DAG: $_ZN7DerivedD1Ev = comdat any
+// CHECK-DAG: $_ZN7DerivedD0Ev = comdat any
+// CHECK-DAG: $_ZN4BaseD2Ev = comdat any
+// CHECK-DAG: $_ZN7DerivedD2Ev = comdat any
+// CHECK-DAG: $_ZTV4Base = comdat any
+// CHECK-DAG: $_ZTV7Derived = comdat any
+class Base {
+public:
+
+  virtual ~Base() = default;
+
+  virtual void BaseA(int a) { }
+};
+
+class Derived : public Base {
+public:
+
+  ~Derived() override = default;
+
+  void BaseA(int a) override { x = a; }
+
+  virtual void DerivedB() { }
+private:
+  int x;
+};
+
+struct VirtualContainer {
+  Base baseObj;
+  Derived derivedObj;
+  Base *basePtr;
+};
+
+int main() {
+  VirtualContainer container;
+  container.basePtr = &container.derivedObj;
+  int a = 50;
+#pragma omp target map(container.baseObj, container.derivedObj,                
\
+                           container.basePtr[ : 1])
+  {
+    container.baseObj.BaseA(a);
+    container.derivedObj.BaseA(a);
+    container.derivedObj.DerivedB();
+    container.basePtr->BaseA(a);
+  }
+  return 0;
+}
diff --git a/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp 
b/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp
new file mode 100644
index 0000000000000..3069a4994a479
--- /dev/null
+++ b/clang/test/OpenMP/target_vtable_codegen_mult_inherritence.cpp
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple 
x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode  
-emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple 
nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode 
 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path 
%t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
+// expected-no-diagnostics
+
+// CHECK-DAG: @_ZTV6Base_1
+// CHECK-DAG: @_ZTV7Derived
+// CHECK-DAG: @_ZTV6Base_2
+#pragma omp begin declare target
+
+class Base_1 {
+public:
+  virtual void foo() { }
+  virtual void bar() { }
+};
+
+class Base_2 {
+public:
+  virtual void foo() { }
+  virtual void bar() { }
+};
+
+class Derived : public Base_1, public Base_2 {
+public:
+  virtual void foo() override { }
+  virtual void bar() override { }
+};
+
+#pragma omp end declare target
+
+int main() {
+  Base_1 base;
+  Derived derived;
+
+  // Make sure we emit vtable for parent class (Base_1 and Base_2)
+#pragma omp target data map(derived)
+    {
+      Base_1 *p1 = &derived;
+
+#pragma omp target
+      {
+        p1->foo();
+        p1->bar();
+      }
+    }
+  return 0;
+}
diff --git a/clang/test/OpenMP/target_vtable_codegen_nested.cpp 
b/clang/test/OpenMP/target_vtable_codegen_nested.cpp
new file mode 100644
index 0000000000000..1ece83d60ac58
--- /dev/null
+++ b/clang/test/OpenMP/target_vtable_codegen_nested.cpp
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple 
x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode  
-emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=52
+// RUN: %clang_cc1 -verify -fopenmp -Wno-openmp-mapping -x c++ -triple 
nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode 
 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path 
%t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=52 | FileCheck %s
+// expected-no-diagnostics
+
+// CHECK-DAG: @_ZTV3Car
+// CHECK-DAG: @_ZTV6Engine
+// CHECK-DAG: @_ZTV6Wheels
+// CHECK-DAG: @_ZTV7Vehicle
+// CHECK-DAG: @_ZTV5Brand
+class Engine {
+public:
+  Engine(const char *type) : type(type) {}
+  virtual ~Engine() {}
+
+  virtual void start() const { }
+
+protected:
+  const char *type;
+};
+
+class Wheels {
+public:
+  Wheels(int count) : count(count) {}
+  virtual ~Wheels() {}
+
+  virtual void roll() const { }
+
+protected:
+  int count;
+};
+
+class Vehicle {
+public:
+  Vehicle(int speed) : speed(speed) {}
+  virtual ~Vehicle() {}
+
+  virtual void move() const { }
+
+protected:
+  int speed;
+};
+
+class Brand {
+public:
+  Brand(const char *brandName) : brandName(brandName) {}
+  virtual ~Brand() {}
+
+  void showBrand() const { }
+
+protected:
+  const char *brandName;
+};
+
+class Car : public Vehicle, public Brand {
+public:
+  Car(const char *brand, int speed, const char *engineType, int wheelCount)
+      : Vehicle(speed), Brand(brand), engine(engineType), wheels(wheelCount) {}
+
+  void move() const override { }
+
+  void drive() const {
+    showBrand();
+    engine.start();
+    wheels.roll();
+    move();
+  }
+
+private:
+  Engine engine;
+  Wheels wheels;
+};
+
+int main() {
+  Car myActualCar("Ford", 100, "Hybrid", 4);
+
+  // Make sure we emit VTable for dynamic class as field
+#pragma omp target map(myActualCar)
+  {
+    myActualCar.drive();
+  }
+  return 0;
+}
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h 
b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index f43ef932e965a..cc0d4c89f9b9f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -390,6 +390,8 @@ class OffloadEntriesInfoManager {
     OMPTargetGlobalVarEntryIndirect = 0x8,
     /// Mark the entry as a register requires global.
     OMPTargetGlobalRegisterRequires = 0x10,
+    /// Mark the entry as a declare target indirect vtable.
+    OMPTargetGlobalVarEntryIndirectVTable = 0x20,
   };
 
   /// Kind of device clause for declare target variables
@@ -2666,7 +2668,8 @@ class OpenMPIRBuilder {
   enum EmitMetadataErrorKind {
     EMIT_MD_TARGET_REGION_ERROR,
     EMIT_MD_DECLARE_TARGET_ERROR,
-    EMIT_MD_GLOBAL_VAR_LINK_ERROR
+    EMIT_MD_GLOBAL_VAR_LINK_ERROR,
+    EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
   };
 
   /// Callback function type
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 220eee3cb8b08..236cfab3f031c 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -10246,6 +10246,13 @@ void 
OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
           continue;
         }
         break;
+      case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect:
+      case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable:
+        if (!CE->getAddress()) {
+          ErrorFn(EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR, E.second);
+          continue;
+        }
+        break;
       default:
         break;
       }
@@ -10255,12 +10262,17 @@ void 
OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
       // entry. Indirect variables are handled separately on the device.
       if (auto *GV = dyn_cast<GlobalValue>(CE->getAddress()))
         if ((GV->hasLocalLinkage() || GV->hasHiddenVisibility()) &&
-            Flags != 
OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
+            (Flags !=
+                 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect ||
+             Flags != OffloadEntriesInfoManager::
+                          OMPTargetGlobalVarEntryIndirectVTable))
           continue;
 
       // Indirect globals need to use a special name that doesn't match the 
name
       // of the associated host global.
-      if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
+      if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect 
||
+          Flags ==
+              OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable)
         createOffloadEntry(CE->getAddress(), CE->getAddress(), 
CE->getVarSize(),
                            Flags, CE->getLinkage(), CE->getVarName());
       else
@@ -10689,7 +10701,9 @@ void 
OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
       }
       return;
     }
-    if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
+    if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect ||
+        Flags ==
+            OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable)
       OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
                                                 Addr, VarSize, Flags, Linkage,
                                                 VarName.str());

>From 640bb630ab388c21b407fce526461bd379ff05eb Mon Sep 17 00:00:00 2001
From: jason-van-beusekom <[email protected]>
Date: Fri, 7 Nov 2025 10:40:53 -0600
Subject: [PATCH 2/2] [OpenMP][offload] Register Vtables runtime support for
 indirect calls

- Modify PluginInterface to register Vtables to indirect call table
---
 offload/include/omptarget.h                   |   2 +
 offload/libomptarget/PluginManager.cpp        |   7 +-
 offload/libomptarget/device.cpp               |  63 ++++++++---
 .../test/api/omp_indirect_call_table_manual.c | 107 ++++++++++++++++++
 4 files changed, 164 insertions(+), 15 deletions(-)
 create mode 100644 offload/test/api/omp_indirect_call_table_manual.c

diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 8fd722bb15022..3317441f04eba 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -94,6 +94,8 @@ enum OpenMPOffloadingDeclareTargetFlags {
   OMP_DECLARE_TARGET_INDIRECT = 0x08,
   /// This is an entry corresponding to a requirement to be registered.
   OMP_REGISTER_REQUIRES = 0x10,
+  /// Mark the entry global as being an indirect vtable.
+  OMP_DECLARE_TARGET_INDIRECT_VTABLE = 0x20,
 };
 
 enum TargetAllocTy : int32_t {
diff --git a/offload/libomptarget/PluginManager.cpp 
b/offload/libomptarget/PluginManager.cpp
index b57a2f815cba6..6fc330b92f0f5 100644
--- a/offload/libomptarget/PluginManager.cpp
+++ b/offload/libomptarget/PluginManager.cpp
@@ -434,7 +434,8 @@ static int loadImagesOntoDevice(DeviceTy &Device) {
 
         llvm::offloading::EntryTy DeviceEntry = Entry;
         if (Entry.Size) {
-          if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName,
+          if (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) &&
+              Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName,
                                      &DeviceEntry.Address) != OFFLOAD_SUCCESS)
             REPORT("Failed to load symbol %s\n", Entry.SymbolName);
 
@@ -443,7 +444,9 @@ static int loadImagesOntoDevice(DeviceTy &Device) {
           // the device to point to the memory on the host.
           if ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
               (PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)) {
-            if (Device.RTL->data_submit(DeviceId, DeviceEntry.Address,
+            if (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) &&
+                !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT) &&
+                Device.RTL->data_submit(DeviceId, DeviceEntry.Address,
                                         Entry.Address,
                                         Entry.Size) != OFFLOAD_SUCCESS)
               REPORT("Failed to write symbol for USM %s\n", Entry.SymbolName);
diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp
index 71423ae0c94d9..d5436bde47ba5 100644
--- a/offload/libomptarget/device.cpp
+++ b/offload/libomptarget/device.cpp
@@ -112,21 +112,58 @@ setupIndirectCallTable(DeviceTy &Device, 
__tgt_device_image *Image,
   llvm::SmallVector<std::pair<void *, void *>> IndirectCallTable;
   for (const auto &Entry : Entries) {
     if (Entry.Kind != llvm::object::OffloadKind::OFK_OpenMP ||
-        Entry.Size == 0 || !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT))
+        Entry.Size == 0 ||
+        (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT) &&
+         !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE)))
       continue;
 
-    assert(Entry.Size == sizeof(void *) && "Global not a function pointer?");
-    auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
-
-    void *Ptr;
-    if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr))
-      return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
-                                       "failed to load %s", Entry.SymbolName);
-
-    HstPtr = Entry.Address;
-    if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo))
-      return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
-                                       "failed to load %s", Entry.SymbolName);
+    size_t PtrSize = sizeof(void *);
+    if (Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) {
+      // This is a VTable entry, the current entry is the first index of the
+      // VTable and Entry.Size is the total size of the VTable. Unlike the
+      // indirect function case below, the Global is not of size Entry.Size and
+      // is instead of size PtrSize (sizeof(void*)).
+      void *Vtable;
+      void *res;
+      if (Device.RTL->get_global(Binary, PtrSize, Entry.SymbolName, &Vtable))
+        return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+                                         "failed to load %s", 
Entry.SymbolName);
+
+      // HstPtr = Entry.Address;
+      if (Device.retrieveData(&res, Vtable, PtrSize, AsyncInfo))
+        return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+                                         "failed to load %s", 
Entry.SymbolName);
+      if (Device.synchronize(AsyncInfo))
+        return error::createOffloadError(
+            error::ErrorCode::INVALID_BINARY,
+            "failed to synchronize after retrieving %s", Entry.SymbolName);
+      // Calculate and emplace entire Vtable from first Vtable byte
+      for (uint64_t i = 0; i < Entry.Size / PtrSize; ++i) {
+        auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
+        HstPtr = reinterpret_cast<void *>(
+            reinterpret_cast<uintptr_t>(Entry.Address) + i * PtrSize);
+        DevPtr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(res) +
+                                          i * PtrSize);
+      }
+    } else {
+      // Indirect function case: Entry.Size should equal PtrSize since we're
+      // dealing with a single function pointer (not a VTable)
+      assert(Entry.Size == PtrSize && "Global not a function pointer?");
+      auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
+      void *Ptr;
+      if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr))
+        return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+                                         "failed to load %s", 
Entry.SymbolName);
+
+      HstPtr = Entry.Address;
+      if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo))
+        return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+                                         "failed to load %s", 
Entry.SymbolName);
+    }
+    if (Device.synchronize(AsyncInfo))
+      return error::createOffloadError(
+          error::ErrorCode::INVALID_BINARY,
+          "failed to synchronize after retrieving %s", Entry.SymbolName);
   }
 
   // If we do not have any indirect globals we exit early.
diff --git a/offload/test/api/omp_indirect_call_table_manual.c 
b/offload/test/api/omp_indirect_call_table_manual.c
new file mode 100644
index 0000000000000..e958d47d69dad
--- /dev/null
+++ b/offload/test/api/omp_indirect_call_table_manual.c
@@ -0,0 +1,107 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+#include <assert.h>
+#include <omp.h>
+#include <stdio.h>
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL
+
+typedef struct {
+  uint64_t Reserved;
+  uint16_t Version;
+  uint16_t Kind; // OpenMP==1
+  uint32_t Flags;
+  void *Address;
+  char *SymbolName;
+  uint64_t Size;
+  uint64_t Data;
+  void *AuxAddr;
+} __tgt_offload_entry;
+
+enum OpenMPOffloadingDeclareTargetFlags {
+  /// Mark the entry global as having a 'link' attribute.
+  OMP_DECLARE_TARGET_LINK = 0x01,
+  /// Mark the entry global as being an indirectly callable function.
+  OMP_DECLARE_TARGET_INDIRECT = 0x08,
+  /// This is an entry corresponding to a requirement to be registered.
+  OMP_REGISTER_REQUIRES = 0x10,
+  /// Mark the entry global as being an indirect vtable.
+  OMP_DECLARE_TARGET_INDIRECT_VTABLE = 0x20,
+};
+
+#pragma omp begin declare variant match(device = {kind(gpu)})
+// Provided by the runtime.
+void *__llvm_omp_indirect_call_lookup(void *host_ptr);
+#pragma omp declare target to(__llvm_omp_indirect_call_lookup)                 
\
+    device_type(nohost)
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match(device = {kind(cpu)})
+// We assume unified addressing on the CPU target.
+void *__llvm_omp_indirect_call_lookup(void *host_ptr) { return host_ptr; }
+#pragma omp end declare variant
+
+#pragma omp begin declare target
+void foo(int *i) { *i += 1; }
+void bar(int *i) { *i += 10; }
+void baz(int *i) { *i += 100; }
+#pragma omp end declare target
+
+typedef void (*fptr_t)(int *i);
+
+// Dispatch Table - declare separately on host and device to avoid
+// registering with the library; this also allows us to use separate
+// names, which is convenient for debugging.  This dispatchTable is
+// intended to mimic what Clang emits for C++ vtables.
+fptr_t dispatchTable[] = {foo, bar, baz};
+#pragma omp begin declare target device_type(nohost)
+fptr_t GPUdispatchTable[] = {foo, bar, baz};
+fptr_t *GPUdispatchTablePtr = GPUdispatchTable;
+#pragma omp end declare target
+
+// Define "manual" OpenMP offload entries, where we emit Clang
+// offloading entry structure definitions in the appropriate ELF
+// section.  This allows us to  emulate the offloading entries that Clang would
+// normally emit for us
+
+__attribute__((weak, section("llvm_offload_entries"), aligned(8)))
+const __tgt_offload_entry __offloading_entry[] = {{
+    0ULL,                               // Reserved
+    1,                                  // Version
+    1,                                  // Kind
+    OMP_DECLARE_TARGET_INDIRECT_VTABLE, // Flags
+    &dispatchTable,                     // Address
+    "GPUdispatchTablePtr",              // SymbolName
+    (size_t)(sizeof(dispatchTable)),    // Size
+    0ULL,                               // Data
+    NULL                                // AuxAddr
+}};
+
+// Mimic how Clang emits vtable pointers for C++ classes
+typedef struct {
+  fptr_t *dispatchPtr;
+} myClass;
+
+// ---------------------------------------------------------------------------
+int main() {
+  myClass obj_foo = {dispatchTable + 0};
+  myClass obj_bar = {dispatchTable + 1};
+  myClass obj_baz = {dispatchTable + 2};
+  int aaa = 0;
+
+#pragma omp target map(aaa) map(to : obj_foo, obj_bar, obj_baz)
+  {
+    // Lookup
+    fptr_t *foo_ptr = __llvm_omp_indirect_call_lookup(obj_foo.dispatchPtr);
+    fptr_t *bar_ptr = __llvm_omp_indirect_call_lookup(obj_bar.dispatchPtr);
+    fptr_t *baz_ptr = __llvm_omp_indirect_call_lookup(obj_baz.dispatchPtr);
+    foo_ptr[0](&aaa);
+    bar_ptr[0](&aaa);
+    baz_ptr[0](&aaa);
+  }
+
+  assert(aaa == 111);
+  // CHECK: PASS
+  printf("PASS\n");
+  return 0;
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to