This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new e40efd8648 GH-48159 [C++][Gandiva] Projector make is significantly 
slower after move to OrcJIT (#49063)
e40efd8648 is described below

commit e40efd86489c673d5b0203ec943c704d92930f2f
Author: Logan Riggs <[email protected]>
AuthorDate: Fri Jan 30 00:34:22 2026 -0800

    GH-48159 [C++][Gandiva] Projector make is significantly slower after move 
to OrcJIT (#49063)
    
    ### Rationale for this change
    Reduces LLVM TargetMachine object creation from 3 to 1. This object is 
expensive to create and the extra copies weren't needed.
    
    ### What changes are included in this PR?
    Refactor the Engine class to only create one target machine and pass that 
to the necessary functions.
    
    Before the change (3 TargetMachines created):
    
    First TargetMachine: In Engine::Make(), MakeTargetMachineBuilder() is 
called, then BuildJIT() is called. Inside LLJITBuilder::create(), when 
prepareForConstruction() runs, if no DataLayout was set, it calls 
JTMB->getDefaultDataLayoutForTarget() which creates a temporary TargetMachine 
just to get the DataLayout.
    
    Second TargetMachine: Inside BuildJIT(), when setCompileFunctionCreator is 
used with the lambda, that lambda calls JTMB.createTargetMachine() to create a 
TargetMachine for the TMOwningSimpleCompiler.
    
    Third TargetMachine: Back in Engine::Make(), after BuildJIT() returns, 
there's an explicit call to jtmb.createTargetMachine() to create 
target_machine_ for the Engine.
    
    After the change (1 TargetMachine created):
    
    The key changes are:
    
    Create TargetMachine first: The code now creates the TargetMachine 
explicitly at the start of the Engine in Engine::Make. That machine is passed 
to BuildJIT. In BuildJiIT that machine's DataLayout is sent to LLJITBuilder 
which prevents prepareForConstruction() from calling 
getDefaultDataLayoutForTarget() (which would create a temporary TargetMachine).
    
    Use SimpleCompiler instead of TMOwningSimpleCompiler:
    SimpleCompiler takes a reference to an existing TargetMachine rather than 
owning one, so no new TargetMachine is created.
    A shared_ptr is used to ensure that TargetMachine stays around for the 
lifetime of the LLJIT instance.
    
    ### Are these changes tested?
    Yes, unit and integration.
    
    ### Are there any user-facing changes?
    No.
    
    * GitHub Issue: #48159
    
    Lead-authored-by: [email protected] <[email protected]>
    Co-authored-by: Logan Riggs <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 cpp/src/gandiva/engine.cc | 32 +++++++++++++++++++++-----------
 cpp/src/gandiva/engine.h  |  6 ++++--
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index a718a80060..496722b1ea 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -219,7 +219,10 @@ Status UseJITLinkIfEnabled(llvm::orc::LLJITBuilder& 
jit_builder) {
 
 Result<std::unique_ptr<llvm::orc::LLJIT>> BuildJIT(
     llvm::orc::JITTargetMachineBuilder jtmb,
-    std::optional<std::reference_wrapper<GandivaObjectCache>>& object_cache) {
+    std::shared_ptr<llvm::TargetMachine> target_machine,
+    std::optional<std::reference_wrapper<GandivaObjectCache>> object_cache) {
+  auto data_layout = target_machine->createDataLayout();
+
   llvm::orc::LLJITBuilder jit_builder;
 
 #ifdef JIT_LINK_SUPPORTED
@@ -227,20 +230,20 @@ Result<std::unique_ptr<llvm::orc::LLJIT>> BuildJIT(
 #endif
 
   jit_builder.setJITTargetMachineBuilder(std::move(jtmb));
+  jit_builder.setDataLayout(std::make_optional(data_layout));
+
   if (object_cache.has_value()) {
     jit_builder.setCompileFunctionCreator(
-        [&object_cache](llvm::orc::JITTargetMachineBuilder JTMB)
+        [tm = std::move(target_machine),
+         &object_cache](llvm::orc::JITTargetMachineBuilder JTMB)
             -> 
llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
-          auto target_machine = JTMB.createTargetMachine();
-          if (!target_machine) {
-            return target_machine.takeError();
-          }
           // after compilation, the object code will be stored into the given 
object
           // cache
-          return std::make_unique<llvm::orc::TMOwningSimpleCompiler>(
-              std::move(*target_machine), &object_cache.value().get());
+          return std::make_unique<llvm::orc::SimpleCompiler>(*tm,
+                                                             
&object_cache.value().get());
         });
   }
+
   auto maybe_jit = jit_builder.create();
   ARROW_ASSIGN_OR_RAISE(auto jit,
                         AsArrowResult(maybe_jit, "Could not create LLJIT 
instance: "));
@@ -317,7 +320,7 @@ void Engine::InitOnce() {
 
 Engine::Engine(const std::shared_ptr<Configuration>& conf,
                std::unique_ptr<llvm::orc::LLJIT> lljit,
-               std::unique_ptr<llvm::TargetMachine> target_machine, bool 
cached)
+               std::shared_ptr<llvm::TargetMachine> target_machine, bool 
cached)
     : context_(std::make_unique<llvm::LLVMContext>()),
       lljit_(std::move(lljit)),
       ir_builder_(std::make_unique<llvm::IRBuilder<>>(*context_)),
@@ -367,14 +370,21 @@ Result<std::unique_ptr<Engine>> Engine::Make(
     std::optional<std::reference_wrapper<GandivaObjectCache>> object_cache) {
   std::call_once(llvm_init_once_flag, InitOnce);
 
+  // Create the target machine
   ARROW_ASSIGN_OR_RAISE(auto jtmb, MakeTargetMachineBuilder(*conf));
-  ARROW_ASSIGN_OR_RAISE(auto jit, BuildJIT(jtmb, object_cache));
   auto maybe_tm = jtmb.createTargetMachine();
   ARROW_ASSIGN_OR_RAISE(auto target_machine,
                         AsArrowResult(maybe_tm, "Could not create target 
machine: "));
 
+  auto shared_target_machine =
+      std::shared_ptr<llvm::TargetMachine>(std::move(target_machine));
+
+  // Build the LLJIT instance
+  ARROW_ASSIGN_OR_RAISE(auto jit,
+                        BuildJIT(std::move(jtmb), shared_target_machine, 
object_cache));
+
   std::unique_ptr<Engine> engine{
-      new Engine(conf, std::move(jit), std::move(target_machine), cached)};
+      new Engine(conf, std::move(jit), std::move(shared_target_machine), 
cached)};
 
   ARROW_RETURN_NOT_OK(engine->Init());
   return engine;
diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h
index 3c8914a7b4..20165787cb 100644
--- a/cpp/src/gandiva/engine.h
+++ b/cpp/src/gandiva/engine.h
@@ -96,7 +96,7 @@ class GANDIVA_EXPORT Engine {
  private:
   Engine(const std::shared_ptr<Configuration>& conf,
          std::unique_ptr<llvm::orc::LLJIT> lljit,
-         std::unique_ptr<llvm::TargetMachine> target_machine, bool cached);
+         std::shared_ptr<llvm::TargetMachine> target_machine, bool cached);
 
   // Post construction init. This _must_ be called after the constructor.
   Status Init();
@@ -130,7 +130,9 @@ class GANDIVA_EXPORT Engine {
   bool functions_loaded_ = false;
   std::shared_ptr<FunctionRegistry> function_registry_;
   std::string module_ir_;
-  std::unique_ptr<llvm::TargetMachine> target_machine_;
+  // The lifetime of the TargetMachine is shared with LLJIT. This prevents 
unnecessary
+  // duplication of this expensive object.
+  std::shared_ptr<llvm::TargetMachine> target_machine_;
   const std::shared_ptr<Configuration> conf_;
 };
 

Reply via email to