https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/201457

>From cfbd38a76f7255baa0ca2d56e2a70365cb3a17a4 Mon Sep 17 00:00:00 2001
From: Joseph Huber <[email protected]>
Date: Wed, 3 Jun 2026 16:13:46 -0500
Subject: [PATCH 1/2] [Clang] Set default LTO mode for AMDGCN/SPIR-V targets to
 full

Summary:
Previously we had several layers of if conditions that functionally
amounted to pretending like we were in LTO-mode. The previous changes
moved the LTO settings into the toolchain so we can now override it for
our offloading toolchains. This allows us to respect the LTO mode, where
previously there was no way to override it.

The main artifact of this PR should be trimming up the massive if
statement.

Some slight by-products on the old-driver path, but this can be
recovered with `-fno-offload-lto` and the old driver should be deleted
in a few months anyways.
---
 clang/include/clang/Driver/ToolChain.h        |   4 +-
 clang/lib/Driver/Driver.cpp                   |  97 ++-------
 clang/lib/Driver/ToolChains/AMDGPU.cpp        |   3 +
 clang/lib/Driver/ToolChains/AMDGPU.h          |   7 +-
 clang/lib/Driver/ToolChains/HIPSPV.cpp        |  10 +
 clang/lib/Driver/ToolChains/HIPSPV.h          |   6 +
 .../Driver/amdgpu-openmp-sanitize-options.c   |   6 +-
 clang/test/Driver/amdgpu-openmp-toolchain.c   |   8 +-
 clang/test/Driver/hip-binding.hip             |  25 +--
 clang/test/Driver/hip-device-compile.hip      |  18 +-
 .../test/Driver/hip-offload-compress-zlib.hip |   2 +-
 .../test/Driver/hip-offload-compress-zstd.hip |   2 +-
 clang/test/Driver/hip-phases.hip              | 199 +++++++++---------
 clang/test/Driver/hip-rdc-device-only.hip     |  40 ++--
 clang/test/Driver/hip-sanitize-options.hip    |   4 +-
 clang/test/Driver/hip-save-temps.hip          |   8 +-
 .../test/Driver/hip-spirv-backend-bindings.c  |  11 +-
 clang/test/Driver/hip-spirv-backend-opt.c     |   8 +-
 clang/test/Driver/hip-spirv-backend-phases.c  |  16 +-
 clang/test/Driver/hip-spirv-linker-crash.c    |   2 +-
 clang/test/Driver/hip-target-id.hip           |   7 +-
 .../test/Driver/hip-toolchain-device-only.hip |   2 +-
 clang/test/Driver/hip-toolchain-no-rdc.hip    |  18 +-
 .../hip-toolchain-rdc-flto-partitions.hip     |   2 +-
 .../Driver/hip-toolchain-rdc-separate.hip     |  12 +-
 .../Driver/hip-toolchain-rdc-static-lib.hip   |  12 +-
 clang/test/Driver/hip-toolchain-rdc.hip       |  12 +-
 clang/test/Driver/hip-unbundle-preproc.hipi   |   4 +-
 .../Driver/hipspv-toolchain-rdc-separate.hip  |   8 +-
 clang/test/Driver/hipspv-toolchain-rdc.hip    |  17 +-
 clang/test/Driver/hipspv-toolchain.hip        |   2 +-
 clang/test/Driver/openmp-offload-gpu.c        |   2 +-
 clang/test/Driver/spirv-amd-toolchain.c       |   6 +-
 clang/test/Driver/spirv-openmp-toolchain.c    |   2 +-
 34 files changed, 267 insertions(+), 315 deletions(-)

diff --git a/clang/include/clang/Driver/ToolChain.h 
b/clang/include/clang/Driver/ToolChain.h
index c9051d17850ad..554e0e1ac8b50 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -468,8 +468,8 @@ class ToolChain {
   virtual LTOKind getDefaultLTOMode() const;
 
   /// Resolve the requested LTO mode for this toolchain.
-  LTOKind getLTOMode(const llvm::opt::ArgList &Args,
-                     Action::OffloadKind Kind = Action::OFK_None) const;
+  virtual LTOKind getLTOMode(const llvm::opt::ArgList &Args,
+                             Action::OffloadKind Kind = Action::OFK_None) 
const;
 
   /// Returns true if LTO is active for this toolchain given the args.
   bool isUsingLTO(const llvm::opt::ArgList &Args,
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index d1170400f58c2..79f2283f73f48 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -5302,17 +5302,6 @@ Action *Driver::ConstructPhaseAction(
     return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC);
   }
   case phases::Backend: {
-    // Skip a redundant Backend phase for HIP device code when using the new
-    // offload driver, where mid-end is done in linker wrapper. With
-    // -save-temps, we still need the Backend phase to produce optimized IR.
-    if (TargetDeviceOffloadKind == Action::OFK_HIP &&
-        Args.hasFlag(options::OPT_offload_new_driver,
-                     options::OPT_no_offload_new_driver,
-                     C.getActiveOffloadKinds() != Action::OFK_None) &&
-        !offloadDeviceOnly() && !isSaveTempsEnabled() &&
-        !(Args.hasArg(options::OPT_S) && !Args.hasArg(options::OPT_emit_llvm)))
-      return Input;
-
     if (TargetLTOMode != LTOK_None) {
       bool IsDeviceOffload = TargetDeviceOffloadKind != Action::OFK_None;
       if (!IsDeviceOffload) {
@@ -5326,82 +5315,30 @@ Action *Driver::ConstructPhaseAction(
           Output = types::TY_LTO_BC;
         return C.MakeAction<BackendJobAction>(Input, Output);
       }
-      types::ID Output =
-          Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
+      types::ID Output;
+      if (Args.hasArg(options::OPT_emit_llvm)) {
+        Output =
+            Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : 
types::TY_LLVM_BC;
+      } else if (Args.hasArg(options::OPT_S) && offloadDeviceOnly() &&
+                 !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
+                               false)) {
+        // For non-RDC device-only compilations with -S, produce real assembly
+        // since the user explicitly requested assembly output.
+        Output = types::TY_PP_Asm;
+      } else if (Args.hasArg(options::OPT_S)) {
+        Output = types::TY_LTO_IR;
+      } else {
+        Output = types::TY_LTO_BC;
+      }
       return C.MakeAction<BackendJobAction>(Input, Output);
     }
-    bool UseSPIRVBackend = Args.hasFlag(options::OPT_use_spirv_backend,
-                                        options::OPT_no_use_spirv_backend,
-                                        /*Default=*/false);
-
-    auto OffloadingToolChain = Input->getOffloadingToolChain();
-    // For AMD SPIRV, if offloadDeviceOnly(), we call the SPIRV backend unless
-    // LLVM bitcode was requested explicitly or RDC is set. If
-    // !offloadDeviceOnly, we emit LLVM bitcode, and clang-linker-wrapper will
-    // compile it to SPIRV.
-    bool UseSPIRVBackendForHipDeviceOnlyNoRDC =
-        TargetDeviceOffloadKind == Action::OFK_HIP && OffloadingToolChain &&
-        OffloadingToolChain->getTriple().isSPIRV() && UseSPIRVBackend &&
-        offloadDeviceOnly() &&
-        !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
-
-    auto &DefaultToolChain = C.getDefaultToolChain();
-    auto DefaultToolChainTriple = DefaultToolChain.getTriple();
-    // For regular C/C++ to AMD SPIRV emit bitcode to avoid spirv-link
-    // dependency, SPIRVAMDToolChain's linker takes care of the generation of
-    // the final SPIRV. The only exception is -S without -emit-llvm to output
-    // textual SPIRV assembly, which fits the default compilation path.
-    bool EmitBitcodeForNonOffloadAMDSPIRV =
-        !OffloadingToolChain && DefaultToolChainTriple.isSPIRV() &&
-        DefaultToolChainTriple.getVendor() == llvm::Triple::VendorType::AMD &&
-        !(Args.hasArg(options::OPT_S) && !Args.hasArg(options::OPT_emit_llvm));
-
     if (Args.hasArg(options::OPT_emit_llvm) ||
-        EmitBitcodeForNonOffloadAMDSPIRV ||
-        TargetDeviceOffloadKind == Action::OFK_SYCL ||
-        (((Input->getOffloadingToolChain() &&
-           Input->getOffloadingToolChain()->getTriple().isAMDGPU() &&
-           TargetDeviceOffloadKind != Action::OFK_None) ||
-          TargetDeviceOffloadKind == Action::OFK_HIP) &&
-         !UseSPIRVBackendForHipDeviceOnlyNoRDC &&
-         ((Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
-                        false) ||
-           (Args.hasFlag(options::OPT_offload_new_driver,
-                         options::OPT_no_offload_new_driver,
-                         C.getActiveOffloadKinds() != Action::OFK_None) &&
-            !(Args.hasArg(options::OPT_S) &&
-              !Args.hasArg(options::OPT_emit_llvm)) &&
-            (!offloadDeviceOnly() ||
-             (Input->getOffloadingToolChain() &&
-              TargetDeviceOffloadKind == Action::OFK_HIP &&
-              Input->getOffloadingToolChain()->getTriple().isSPIRV())))) ||
-          TargetDeviceOffloadKind == Action::OFK_OpenMP))) {
+        TargetDeviceOffloadKind == Action::OFK_SYCL) {
       types::ID Output =
-          Args.hasArg(options::OPT_S) &&
-                  (TargetDeviceOffloadKind == Action::OFK_None ||
-                   offloadDeviceOnly() ||
-                   (TargetDeviceOffloadKind == Action::OFK_HIP &&
-                    !Args.hasFlag(options::OPT_offload_new_driver,
-                                  options::OPT_no_offload_new_driver,
-                                  C.getActiveOffloadKinds() !=
-                                      Action::OFK_None)))
-              ? types::TY_LLVM_IR
-              : types::TY_LLVM_BC;
+          Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC;
       return C.MakeAction<BackendJobAction>(Input, Output);
     }
 
-    // The SPIRV backend compilation path for HIP must avoid external
-    // dependencies. The default compilation path assembles and links its
-    // output, but the SPIRV assembler and linker are external tools. This code
-    // ensures the backend emits binary SPIRV directly to bypass those steps 
and
-    // avoid failures. Without -save-temps, the compiler may already skip
-    // assembling and linking. With -save-temps, these steps must be explicitly
-    // disabled, as done here. We also force skipping these steps regardless of
-    // -save-temps to avoid relying on optimizations (unless -S is set).
-    // The current HIP bundling expects the type to be types::TY_Image
-    if (UseSPIRVBackendForHipDeviceOnlyNoRDC && !Args.hasArg(options::OPT_S))
-      return C.MakeAction<BackendJobAction>(Input, types::TY_Image);
-
     return C.MakeAction<BackendJobAction>(Input, types::TY_PP_Asm);
   }
   case phases::Assemble:
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index db671e9e59ca2..172ffd8e1f655 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -11,6 +11,7 @@
 #include "clang/Config/config.h"
 #include "clang/Driver/CommonArgs.h"
 #include "clang/Driver/Compilation.h"
+#include "clang/Driver/Driver.h"
 #include "clang/Driver/InputInfo.h"
 #include "clang/Driver/SanitizerArgs.h"
 #include "clang/Options/Options.h"
@@ -710,6 +711,8 @@ Tool *AMDGPUToolChain::buildLinker() const {
   return new tools::amdgpu::Linker(*this);
 }
 
+LTOKind AMDGPUToolChain::getDefaultLTOMode() const { return LTOK_Full; }
+
 DerivedArgList *
 AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
                                Action::OffloadKind DeviceOffloadKind) const {
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h 
b/clang/lib/Driver/ToolChains/AMDGPU.h
index 3d291e9e08cb1..161941455837e 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -94,9 +94,10 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public 
Generic_ELF {
   static bool isWave64(const llvm::opt::ArgList &DriverArgs,
                        llvm::AMDGPU::GPUKind Kind);
   /// Needed for using lto.
-  bool HasNativeLLVMSupport() const override {
-    return true;
-  }
+  bool HasNativeLLVMSupport() const override { return true; }
+
+  /// AMDGPU uses LTO by default to link device bitcode.
+  LTOKind getDefaultLTOMode() const override;
 
   /// Needed for translating LTO options.
   const char *getDefaultLinker() const override { return "ld.lld"; }
diff --git a/clang/lib/Driver/ToolChains/HIPSPV.cpp 
b/clang/lib/Driver/ToolChains/HIPSPV.cpp
index edfb03bd03c84..db15df654e9d4 100644
--- a/clang/lib/Driver/ToolChains/HIPSPV.cpp
+++ b/clang/lib/Driver/ToolChains/HIPSPV.cpp
@@ -338,3 +338,13 @@ void HIPSPVToolChain::adjustDebugInfoKind(
   // TODO: Enable debug info when the SPIR-V backend arrives.
   DebugInfoKind = llvm::codegenoptions::NoDebugInfo;
 }
+
+LTOKind HIPSPVToolChain::getLTOMode(const llvm::opt::ArgList &Args,
+                                    Action::OffloadKind Kind) const {
+  // The old offload driver pipeline does not support LTO output types. Only
+  // default to LTO with the new driver.
+  if (!Args.hasFlag(options::OPT_offload_new_driver,
+                    options::OPT_no_offload_new_driver, true))
+    return LTOK_None;
+  return ToolChain::getLTOMode(Args, Kind);
+}
diff --git a/clang/lib/Driver/ToolChains/HIPSPV.h 
b/clang/lib/Driver/ToolChains/HIPSPV.h
index f9e11a7fb6977..21b6468c21df2 100644
--- a/clang/lib/Driver/ToolChains/HIPSPV.h
+++ b/clang/lib/Driver/ToolChains/HIPSPV.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_HIPSPV_H
 
 #include "SPIRV.h"
+#include "clang/Driver/Driver.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 
@@ -95,6 +96,11 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public 
ToolChain {
   bool isPICDefaultForced() const override { return false; }
   bool SupportsProfiling() const override { return false; }
 
+  LTOKind getDefaultLTOMode() const override { return LTOK_Full; }
+  LTOKind
+  getLTOMode(const llvm::opt::ArgList &Args,
+             Action::OffloadKind Kind = Action::OFK_None) const override;
+
   const ToolChain *HostTC = nullptr;
 
 protected:
diff --git a/clang/test/Driver/amdgpu-openmp-sanitize-options.c 
b/clang/test/Driver/amdgpu-openmp-sanitize-options.c
index 43cf323f45a86..5171963e48e37 100644
--- a/clang/test/Driver/amdgpu-openmp-sanitize-options.c
+++ b/clang/test/Driver/amdgpu-openmp-sanitize-options.c
@@ -104,10 +104,10 @@
 // HOSTSANCOMBINATION: {{"[^"]*clang[^"]*" "-cc1" "-triple" 
"x86_64-unknown-linux-gnu".* "-fopenmp".* 
"-fsanitize=address,fuzzer,fuzzer-no-link".* 
"--offload-targets=amdgcn-amd-amdhsa".* "-x" "c".*}}
 // HOSTSANCOMBINATION2: {{"[^"]*clang[^"]*" "-cc1" "-triple" 
"x86_64-unknown-linux-gnu".* "-fopenmp".* 
"-fsanitize=address,fuzzer,fuzzer-no-link,leak".* 
"--offload-targets=amdgcn-amd-amdhsa".* "-x" "c".*}}
 
-// GPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "amdgcn-amd-amdhsa" 
"-aux-triple" "x86_64-unknown-linux-gnu".* "-emit-llvm-bc".* 
"-mlink-bitcode-file" "[^"]*asanrtl.bc".* "-mlink-builtin-bitcode" 
"[^"]*ockl.bc".* "-target-cpu" "(gfx908|gfx900|gfx1250|gfx1251)".* "-fopenmp".* 
"-fsanitize=address".* "-x" "c".*}}
-// NOGPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "amdgcn-amd-amdhsa" 
"-aux-triple" "x86_64-unknown-linux-gnu".* "-emit-llvm-bc".* "-target-cpu" 
"(gfx908|gfx900)".* "-fopenmp".* "-x" "c".*}}
+// GPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "amdgcn-amd-amdhsa" 
"-aux-triple" "x86_64-unknown-linux-gnu".* "-emit-llvm-bc" "-flto=full" 
"-flto-unit".* "-mlink-bitcode-file" "[^"]*asanrtl.bc".* 
"-mlink-builtin-bitcode" "[^"]*ockl.bc".* "-target-cpu" 
"(gfx908|gfx900|gfx1250|gfx1251)".* "-fopenmp".* "-fsanitize=address".* "-x" 
"c".*}}
+// NOGPUSAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "amdgcn-amd-amdhsa" 
"-aux-triple" "x86_64-unknown-linux-gnu".* "-emit-llvm-bc" "-flto=full" 
"-flto-unit".* "-target-cpu" "(gfx908|gfx900)".* "-fopenmp".* "-x" "c".*}}
 
-// SAN: {{"[^"]*llvm-offload-binary[^"]*" "-o".* 
"--image=file=.*.bc,triple=amdgcn-amd-amdhsa,arch=(gfx908|gfx1250|gfx1251)(:xnack\-|:xnack\+)?,kind=openmp(,feature=(\-xnack|\+xnack))?"}}
+// SAN: {{"[^"]*llvm-offload-binary[^"]*" "-o".* 
"--image=file=.*.o,triple=amdgcn-amd-amdhsa,arch=(gfx908|gfx1250|gfx1251)(:xnack\-|:xnack\+)?,kind=openmp(,feature=(\-xnack|\+xnack))?"}}
 // SAN: {{"[^"]*clang[^"]*" "-cc1" "-triple" "x86_64-unknown-linux-gnu".* 
"-fopenmp".* "-fsanitize=address".* "--offload-targets=amdgcn-amd-amdhsa".* 
"-x" "ir".*}}
 // SAN: {{"[^"]*clang-linker-wrapper[^"]*".* 
"--host-triple=x86_64-unknown-linux-gnu".* "--linker-path=[^"]*".* 
"--whole-archive" 
"[^"]*(libclang_rt.asan_static.a|libclang_rt.asan_static-x86_64.a)".* 
"--whole-archive" "[^"]*(libclang_rt.asan.a|libclang_rt.asan-x86_64.a)".*}}
 
diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c 
b/clang/test/Driver/amdgpu-openmp-toolchain.c
index 9203d9271711b..f95f380efd7c2 100644
--- a/clang/test/Driver/amdgpu-openmp-toolchain.c
+++ b/clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -7,9 +7,9 @@
 
 // verify the tools invocations
 // CHECK: "-cc1" "-triple" 
"x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
-// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" 
"x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "gfx906"
+// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" 
"x86_64-unknown-linux-gnu"{{.*}}"-flto=full" "-flto-unit"{{.*}}"-target-cpu" 
"gfx906"
 // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj"
-// CHECK: clang-linker-wrapper{{.*}} "-o" "a.out"
+// CHECK: 
clang-linker-wrapper{{.*}}"--device-compiler=amdgcn-amd-amdhsa=-flto=full"{{.*}}
 "-o" "a.out"
 
 // RUN:   %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp 
-fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa 
-march=gfx906 %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-PHASES %s
@@ -20,8 +20,8 @@
 // CHECK-PHASES: 4: preprocessor, {3}, cpp-output, (device-openmp, gfx906)
 // CHECK-PHASES: 5: compiler, {4}, ir, (device-openmp, gfx906)
 // CHECK-PHASES: 6: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, 
"device-openmp (amdgcn-amd-amdhsa:gfx906)" {5}, ir
-// CHECK-PHASES: 7: backend, {6}, ir, (device-openmp, gfx906)
-// CHECK-PHASES: 8: offload, "device-openmp (amdgcn-amd-amdhsa:gfx906)" {7}, ir
+// CHECK-PHASES: 7: backend, {6}, lto-bc, (device-openmp, gfx906)
+// CHECK-PHASES: 8: offload, "device-openmp (amdgcn-amd-amdhsa:gfx906)" {7}, 
lto-bc
 // CHECK-PHASES: 9: llvm-offload-binary, {8}, image, (device-openmp)
 // CHECK-PHASES: 10: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, 
"device-openmp (x86_64-unknown-linux-gnu)" {9}, ir
 // CHECK-PHASES: 11: backend, {10}, assembler, (host-openmp)
diff --git a/clang/test/Driver/hip-binding.hip 
b/clang/test/Driver/hip-binding.hip
index cf44692259257..752e7b96117fe 100644
--- a/clang/test/Driver/hip-binding.hip
+++ b/clang/test/Driver/hip-binding.hip
@@ -4,26 +4,26 @@
 // RUN: %clang -ccc-print-bindings --target=x86_64-linux-gnu 
--offload-new-driver \
 // RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
 // RUN:   --no-offload-new-driver -c 2>&1 | FileCheck -check-prefix=NRDCS %s
-// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: 
["[[IN:.*hip-binding.hip]]"], output: "[[OBJ1:.*o]]"
-// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ1]]"], 
output: "[[IMG1:.*]]"
-// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: 
["[[IN:.*hip-binding.hip]]"], output: "[[OBJ2:.*o]]"
-// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], 
output: "[[IMG2:.*]]"
+// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: 
["[[IN:.*hip-binding.hip]]"], output: "[[BC1:.*bc]]"
+// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[BC1]]"], 
output: "[[IMG1:.*]]"
+// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: 
["[[IN:.*hip-binding.hip]]"], output: "[[BC2:.*bc]]"
+// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[BC2]]"], 
output: "[[IMG2:.*]]"
 // NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[IMG1]]", 
"[[IMG2]]"], output: "[[FATBIN:.*]]"
 // NRDCS: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[IN]]", 
"[[FATBIN]]"], output: "{{.*}}"
 
 // RUN: %clang -ccc-print-bindings --target=x86_64-linux-gnu \
 // RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
 // RUN:   --no-offload-new-driver -c -fgpu-rdc 2>&1 | FileCheck 
-check-prefix=RDCS %s
-// RDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: 
["[[IN:.*hip-binding.hip]]"], output: "[[BC1:.*bc]]"
-// RDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: 
["[[IN:.*hip-binding.hip]]"], output: "[[BC2:.*bc]]"
+// RDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: 
["[[IN:.*hip-binding.hip]]"], output: "[[OBJ1:.*o]]"
+// RDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: 
["[[IN:.*hip-binding.hip]]"], output: "[[OBJ2:.*o]]"
 // RDCS: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[IN]]"], output: 
"[[HOSTOBJ:.*o]]"
-// RDCS: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[BC1]]", 
"[[BC2]]", "[[HOSTOBJ]]"], output: "{{.*}}"
+// RDCS: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: 
["[[OBJ1]]", "[[OBJ2]]", "[[HOSTOBJ]]"], output: "{{.*}}"
 
 // RUN: %clang -ccc-print-bindings --target=x86_64-linux-gnu 
--offload-new-driver \
 // RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
 // RUN:   -c -fgpu-rdc 2>&1 | FileCheck -check-prefix=RDCS-NEW %s
-// RDCS-NEW: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], 
output: "[[HIP803:.+]]"
-// RDCS-NEW: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: 
"[[HIP900:.+]]"
+// RDCS-NEW: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], 
output: "[[HIP803:.*o]]"
+// RDCS-NEW: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: 
"[[HIP900:.*o]]"
 // RDCS-NEW: # "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: 
["[[HIP803]]", "[[HIP900]]"], output: "[[HIPFB:.+]]"
 // RDCS-NEW: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", 
"[[HIPFB]]"], output: "{{.*}}"
 
@@ -60,6 +60,7 @@
 // MULTI-D-ONLY-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: 
["[[GFX908]]"], output: "[[GFX908_OUT:.+]]"
 // MULTI-D-ONLY-NEXT: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], 
output: "[[GFX90a:.+]]"
 // MULTI-D-ONLY-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: 
["[[GFX90a]]"], output: "[[GFX90a_OUT:.+]]"
+// MULTI-D-ONLY-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: 
["[[GFX908_OUT]]", "[[GFX90a_OUT]]"], output: "{{.+}}"
 //
 // RUN: not %clang -### --target=x86_64-linux-gnu --offload-new-driver 
-ccc-print-bindings -nogpulib -nogpuinc -emit-llvm \
 // RUN:        --no-gpu-bundle-output --offload-arch=gfx90a 
--offload-arch=gfx908 --offload-device-only -c -o %t %s 2>&1 \
@@ -78,9 +79,9 @@
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-new-driver 
-ccc-print-bindings -nogpulib -nogpuinc -emit-llvm \
 // RUN:        --gpu-bundle-output --offload-arch=gfx90a --offload-arch=gfx908 
--offload-device-only -c -o a.out %s 2>&1 \
 // RUN: | FileCheck -check-prefix=MULTI-D-ONLY-BC %s
-//      MULTI-D-ONLY-BC: "amdgcn-amd-amdhsa" - "clang", inputs: 
["[[INPUT:.+]]"], output: "[[GFX908_BC:.+]]"
-// MULTI-D-ONLY-BC-NEXT: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], 
output: "[[GFX90A_BC:.+]]"
-// MULTI-D-ONLY-BC-NEXT: "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: 
["[[GFX908_BC]]", "[[GFX90A_BC]]"], output: "a.out"
+//      MULTI-D-ONLY-BC: "amdgcn-amd-amdhsa" - "clang", inputs: 
["[[INPUT:.+]]"], output: "[[GFX908:.+]]"
+// MULTI-D-ONLY-BC-NEXT: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], 
output: "[[GFX90A:.+]]"
+// MULTI-D-ONLY-BC-NEXT: "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: 
["[[GFX908]]", "[[GFX90A]]"], output: "a.out"
 
 //
 // Check to ensure that we can use '-fsyntax-only' for HIP output with the new
diff --git a/clang/test/Driver/hip-device-compile.hip 
b/clang/test/Driver/hip-device-compile.hip
index 1dfadfe8f6513..bf9bf933cf8c6 100644
--- a/clang/test/Driver/hip-device-compile.hip
+++ b/clang/test/Driver/hip-device-compile.hip
@@ -62,7 +62,7 @@
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // BC-SAME: "-emit-llvm-bc"
 // LL-SAME: "-emit-llvm"
-// ASM-NOT: "-emit-llvm"
+// ASM-SAME: "-S"
 // CHECK-SAME: "-main-file-name" "a.cu"
 // CHECK-SAME: "-fcuda-is-device"
 // CHECK-SAME: {{".*lib1.bc"}}
@@ -122,11 +122,23 @@
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu --no-gpu-bundle-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=OBJ,OBJ-UBUN %s
 
-// OBJ: {{"*.clang.*"}} {{.*}} "-emit-obj"
+// OBJ: {{"*.clang.*"}} {{.*}} "-emit-llvm-bc"
 // OBJ-NOT: {{"*.llvm-link"}}
 // OBJ-NOT: {{".*opt"}}
 // OBJ-NOT: {{".*llc"}}
-// OBJ-BUN: {{".*lld.*"}}{{.*}}"-o" "{{.*}}.o"
+// OBJ-BUN: {{".*lld.*"}}{{.*}}"-o" "{{.*}}.out"
 // OBJ-UBUN: {{".*lld.*"}}{{.*}}"-o" "a.o"
 // OBJ-BUN: {{".*clang-offload-bundler"}}{{.*}}"-output=a.o"
 // OBJ-UBUN-NOT: {{".*clang-offload-bundler"}}
+
+// RUN: %clang -S --cuda-device-only -### --target=x86_64-linux-gnu \
+// RUN:   --rocm-path=%S/Inputs/rocm -x hip --cuda-gpu-arch=gfx900 
--no-gpu-bundle-output \
+// RUN:   --no-offload-new-driver -fno-offload-lto \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: 2>&1 | FileCheck -check-prefixes=NOLTO %s
+
+// NOLTO: {{".*clang.*"}} "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// NOLTO-SAME: "-S"
+// NOLTO-NOT: "-flto
+// NOLTO-SAME: "-target-cpu" "gfx900"
+// NOLTO-SAME: "-o" "a-hip-amdgcn-amd-amdhsa-gfx900.s"
diff --git a/clang/test/Driver/hip-offload-compress-zlib.hip 
b/clang/test/Driver/hip-offload-compress-zlib.hip
index f51ab328b5775..df63c527a3e0b 100644
--- a/clang/test/Driver/hip-offload-compress-zlib.hip
+++ b/clang/test/Driver/hip-offload-compress-zlib.hip
@@ -12,7 +12,7 @@
 // RUN:   -o %t.bc \
 // RUN: 2>&1 | FileCheck %s
 
-// CHECK: clang-offload-bundler{{.*}} -type=bc
+// CHECK: clang-offload-bundler{{.*}} -type=o
 // CHECK-SAME: 
-targets={{.*}}hip-amdgcn-amd-amdhsa-unknown-gfx1100,hip-amdgcn-amd-amdhsa-unknown-gfx1101
 // CHECK-SAME: --compress --verbose --compression-level=9
 // CHECK: Compressed bundle format
diff --git a/clang/test/Driver/hip-offload-compress-zstd.hip 
b/clang/test/Driver/hip-offload-compress-zstd.hip
index f91c10f046c2d..bffeb38afebac 100644
--- a/clang/test/Driver/hip-offload-compress-zstd.hip
+++ b/clang/test/Driver/hip-offload-compress-zstd.hip
@@ -12,7 +12,7 @@
 // RUN:   -o %t.bc \
 // RUN: 2>&1 | FileCheck %s
 
-// CHECK: clang-offload-bundler{{.*}} -type=bc
+// CHECK: clang-offload-bundler{{.*}} -type=o
 // CHECK-SAME: 
-targets={{.*}}hip-amdgcn-amd-amdhsa-unknown-gfx1100,hip-amdgcn-amd-amdhsa-unknown-gfx1101
 // CHECK-SAME: --compress --verbose --compression-level=9
 // CHECK: Compressed bundle format
diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip
index be6423af1cd40..1cd05dbf15515 100644
--- a/clang/test/Driver/hip-phases.hip
+++ b/clang/test/Driver/hip-phases.hip
@@ -8,7 +8,7 @@
 //
 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=BIN,OLD,OLDN %s
+// RUN: | FileCheck -check-prefixes=BIN,OLDN %s
 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 // RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s 2>&1 \
 // RUN: | FileCheck -check-prefixes=BIN,NEW,NEWN %s
@@ -18,7 +18,7 @@
 //
 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \
-// RUN: | FileCheck -check-prefixes=BIN,OLD,OLDR %s
+// RUN: | FileCheck -check-prefixes=BIN,OLDR %s
 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 // RUN: --offload-new-driver --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \
 // RUN: | FileCheck -check-prefixes=BIN,NEW,NEWR %s
@@ -32,34 +32,35 @@
 // BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], 
(device-[[T]], [[ARCH:gfx803]])
 // BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH]])
 // BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])
-// OLDN-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], 
[[ARCH]])
-// OLDN-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], 
[[ARCH]])
-// OLDR-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
-// OLD-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]])
-// OLD-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image
-// NEW-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, ir
-// OLDN-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]])
-// NEW-DAG: [[P7:[0-9]+]]: llvm-offload-binary, {[[P6]]}, image, (device-[[T]])
-// NEWN-DAG: [[P8:[0-9]+]]: clang-linker-wrapper, {[[P7]]}, hip-fatbin, 
(device-[[T]])
-// NEWLTO-DAG: [[P8:[0-9]+]]: clang-linker-wrapper, {[[P7]]}, hip-fatbin, 
(device-[[T]])
-// OLDR-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, object, (device-[[T]])
-
-// OLDN-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" 
{[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir
-// NEWN-DAG: [[P9:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" 
{[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, ir
-// NEWLTO-DAG: [[P9:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" 
{[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, ir
-// NEWR-DAG: [[P8:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" 
{[[P2]]}, "device-[[T]] (x86_64-unknown-linux-gnu)" {[[P7]]}, ir
-// OLDR-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" 
{[[P10]]}, object
-// OLDN-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
-// OLDN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
-// NEWN-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (host-[[T]])
-// NEWN-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (host-[[T]])
-// NEWLTO-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, lto-bc, (host-hip)
-// NEWR-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (host-[[T]])
-// NEWR-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (host-[[T]])
-// OLDN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
-// NEWN-DAG: [[P12:[0-9]+]]: clang-linker-wrapper, {[[P11]]}, image, 
(host-[[T]])
-// OLDR-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]])
-// NEWR-DAG: [[P11:[0-9]+]]: clang-linker-wrapper, {[[P10]]}, image, 
(host-[[T]])
+// OLDN-DAG: [[P6:[0-9]+]]: linker, {[[P5]]}, image, (device-[[T]], [[ARCH]])
+// OLDN-DAG: [[P7:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH]])" {[[P6]]}, image
+// OLDN-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, hip-fatbin, (device-[[T]])
+// OLDR-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, lto-bc, (device-[[T]], [[ARCH]])
+// OLDR-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, image, (device-[[T]], [[ARCH]])
+// OLDR-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH]])" {[[P7]]}, image
+// OLDR-DAG: [[P9:[0-9]+]]: linker, {[[P8]]}, object, (device-[[T]])
+// OLDR-DAG: [[P10:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" 
{[[P9]]}, object
+// NEW-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, lto-bc, (device-[[T]], [[ARCH]])
+// NEW-DAG: [[P7:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH]])" {[[P6]]}, lto-bc
+// NEW-DAG: [[P8:[0-9]+]]: llvm-offload-binary, {[[P7]]}, image, (device-[[T]])
+// NEWN-DAG: [[P9:[0-9]+]]: clang-linker-wrapper, {[[P8]]}, hip-fatbin, 
(device-[[T]])
+// NEWLTO-DAG: [[P9:[0-9]+]]: clang-linker-wrapper, {[[P8]]}, hip-fatbin, 
(device-[[T]])
+
+// OLDN-DAG: [[P9:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" 
{[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, ir
+// NEWN-DAG: [[P10:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" 
{[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P9]]}, ir
+// NEWLTO-DAG: [[P10:[0-9]+]]: offload, "host-[[T]] 
(x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" 
{[[P9]]}, ir
+// NEWR-DAG: [[P9:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" 
{[[P2]]}, "device-[[T]] (x86_64-unknown-linux-gnu)" {[[P8]]}, ir
+// OLDR-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P10]]}, image, (host-[[T]])
+// OLDN-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (host-[[T]])
+// OLDN-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (host-[[T]])
+// OLDN-DAG: [[P12:[0-9]+]]: linker, {[[P11]]}, image, (host-[[T]])
+// NEWN-DAG: [[P11:[0-9]+]]: backend, {[[P10]]}, assembler, (host-[[T]])
+// NEWN-DAG: [[P12:[0-9]+]]: assembler, {[[P11]]}, object, (host-[[T]])
+// NEWN-DAG: [[P13:[0-9]+]]: clang-linker-wrapper, {[[P12]]}, image, 
(host-[[T]])
+// NEWLTO-DAG: [[P11:[0-9]+]]: backend, {[[P10]]}, lto-bc, (host-hip)
+// NEWR-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (host-[[T]])
+// NEWR-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (host-[[T]])
+// NEWR-DAG: [[P12:[0-9]+]]: clang-linker-wrapper, {[[P11]]}, image, 
(host-[[T]])
 
 //
 // Test single gpu architecture up to the assemble phase.
@@ -73,7 +74,7 @@
 // ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], 
(device-[[T]], [[ARCH:gfx803]])
 // ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH]])
 // ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
-// ASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], 
[[ARCH]])
+// ASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, lto-ir, (device-[[T]], [[ARCH]])
 
 // ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (host-[[T]])
 // ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, 
(host-[[T]])
@@ -98,23 +99,19 @@
 // NRD2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], 
(device-[[T]], [[ARCH1:gfx803]])
 // NRD2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH1]])
 // NRD2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
-// NRD2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], 
[[ARCH1]])
-// NRD2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], 
[[ARCH1]])
-// NRD2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]])
-// NRD2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image
-
-// NRD2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], 
(device-[[T]], [[ARCH2:gfx900]])
-// NRD2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH2]])
-// NRD2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
-// NRD2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], 
[[ARCH2]])
-// NRD2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], 
[[ARCH2]])
-// NRD2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], 
[[ARCH2]])
-// NRD2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image
-// NRD2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, 
(device-[[T]])
-// NRD2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" 
{[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, ir
-// NRD2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]])
-// NRD2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
-// NCL2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]])
+// NRD2-DAG: [[P6:[0-9]+]]: linker, {[[P5]]}, image, (device-[[T]], [[ARCH1]])
+// NRD2-DAG: [[P7:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH1]])" {[[P6]]}, image
+
+// NRD2-DAG: [[P8:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], 
(device-[[T]], [[ARCH2:gfx900]])
+// NRD2-DAG: [[P9:[0-9]+]]: preprocessor, {[[P8]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH2]])
+// NRD2-DAG: [[P10:[0-9]+]]: compiler, {[[P9]]}, ir, (device-[[T]], [[ARCH2]])
+// NRD2-DAG: [[P11:[0-9]+]]: linker, {[[P10]]}, image, (device-[[T]], 
[[ARCH2]])
+// NRD2-DAG: [[P12:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH2]])" {[[P11]]}, image
+// NRD2-DAG: [[P13:[0-9]+]]: linker, {[[P7]], [[P12]]}, hip-fatbin, 
(device-[[T]])
+// NRD2-DAG: [[P14:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" 
{[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P13]]}, ir
+// NRD2-DAG: [[P15:[0-9]+]]: backend, {[[P14]]}, assembler, (host-[[T]])
+// NRD2-DAG: [[P16:[0-9]+]]: assembler, {[[P15]]}, object, (host-[[T]])
+// NCL2-DAG: [[P17:[0-9]+]]: linker, {[[P16]]}, image, (host-[[T]])
 
 //
 // Test two gpu architectures with complete compilation with -fgpu-rdc.
@@ -136,18 +133,18 @@
 // RDC2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], 
(device-[[T]], [[ARCH1:gfx803]])
 // RDC2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH1]])
 // RDC2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
-// RDC2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH1]])
+// RDC2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, lto-bc, (device-[[T]], 
[[ARCH1]])
 // RCL2-DAG: [[P8:[0-9]+]]: linker, {[[P6]]}, image, (device-[[T]], [[ARCH1]])
 // RCL2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image
-// RC2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH1]])" {[[P6]]}, ir
+// RC2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH1]])" {[[P6]]}, lto-bc
 
 // RDC2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], 
(device-[[T]], [[ARCH2:gfx900]])
 // RDC2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH2]])
 // RDC2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
-// RDC2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, ir, (device-[[T]], [[ARCH2]])
+// RDC2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, lto-bc, (device-[[T]], 
[[ARCH2]])
 // RCL2-DAG: [[P15:[0-9]+]]: linker, {[[P13]]}, image, (device-[[T]], 
[[ARCH2]])
 // RCL2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image
-// RC2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH2]])" {[[P13]]}, ir
+// RC2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH2]])" {[[P13]]}, lto-bc
 
 // RC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], 
(host-[[T]])
 // RC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, 
(host-[[T]])
@@ -250,12 +247,10 @@
 // DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], 
(device-[[T]], [[ARCH:gfx803]])
 // DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH]])
 // DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
-// DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], 
[[ARCH]])
-// DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], 
[[ARCH]])
-// DBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
-// DBIN-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image
-// DBIN-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, hip-fatbin, (device-hip, )
-// DBIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" 
{[[P7]]}, hip-fatbin
+// DBIN-DAG: [[P3:[0-9]+]]: linker, {[[P2]]}, image, (device-[[T]], [[ARCH]])
+// DBIN-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, image
+// DBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, hip-fatbin, (device-hip, )
+// DBIN-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" 
{[[P5]]}, hip-fatbin
 // DBIN-NOT: host
 
 //
@@ -294,10 +289,9 @@
 // RELOC-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], 
(device-[[T]], [[ARCH:gfx803]])
 // RELOC-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH]])
 // RELOC-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
-// RELOC-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], 
[[ARCH]])
-// RELOC-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], 
[[ARCH]])
+// RELOC-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, lto-bc, (device-[[T]], 
[[ARCH]])
 // RELOC-NOT: linker
-// RELOC-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}
+// RELOC-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}
 // RELOC-NOT: host
 
 //
@@ -315,17 +309,15 @@
 // RELOC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], 
(device-[[T]], [[ARCH:gfx803]])
 // RELOC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH]])
 // RELOC2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
-// RELOC2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], 
[[ARCH]])
-// RELOC2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], 
[[ARCH]])
-// RELOC2-NOT: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
-// RELOC2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}, object
-// RELOC2-DAG: [[P6:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], 
(device-[[T]], [[ARCH2:gfx900]])
-// RELOC2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH2]])
-// RELOC2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
-// RELOC2-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], 
[[ARCH2]])
-// RELOC2-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], 
[[ARCH2]])
+// RELOC2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, lto-bc, (device-[[T]], 
[[ARCH]])
+// RELOC2-NOT: linker
+// RELOC2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, lto-bc
+// RELOC2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], 
(device-[[T]], [[ARCH2:gfx900]])
+// RELOC2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH2]])
+// RELOC2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
+// RELOC2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, lto-bc, (device-[[T]], 
[[ARCH2]])
 // RELOC2-NOT: linker
-// RELOC2-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH2]])" {[[P10]]}, object
+// RELOC2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, lto-bc
 // RELOC2-NOT: host
 
 //
@@ -346,19 +338,15 @@
 // DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], 
(device-[[T]], [[ARCH:gfx803]])
 // DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH]])
 // DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
-// DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], 
[[ARCH]])
-// DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], 
[[ARCH]])
-// DBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
-// DBIN2-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image
-// DBIN2-DAG: [[P7:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], 
(device-[[T]], [[ARCH2:gfx900]])
-// DBIN2-DAG: [[P8:[0-9]+]]: preprocessor, {[[P7]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH2]])
-// DBIN2-DAG: [[P9:[0-9]+]]: compiler, {[[P8]]}, ir, (device-[[T]], [[ARCH2]])
-// DBIN2-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (device-[[T]], 
[[ARCH2]])
-// DBIN2-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (device-[[T]], 
[[ARCH2]])
-// DBIN2-DAG: [[P12:[0-9]+]]: linker, {[[P11]]}, image, (device-[[T]], 
[[ARCH2]])
-// DBIN2-DAG: [[P13:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH2]])" {[[P12]]}, image
-// DBIN2-DAG: [[P14:[0-9]+]]: linker, {[[P6]], [[P13]]}, hip-fatbin, 
(device-hip, )
-// DBIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" 
{[[P14]]}, hip-fatbin
+// DBIN2-DAG: [[P3:[0-9]+]]: linker, {[[P2]]}, image, (device-[[T]], [[ARCH]])
+// DBIN2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, image
+// DBIN2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], 
(device-[[T]], [[ARCH2:gfx900]])
+// DBIN2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH2]])
+// DBIN2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
+// DBIN2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH2]])
+// DBIN2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] 
(amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, image
+// DBIN2-DAG: [[P10:[0-9]+]]: linker, {[[P4]], [[P9]]}, hip-fatbin, 
(device-hip, )
+// DBIN2-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" 
{[[P10]]}, hip-fatbin
 // DBIN2-NOT: host
 
 //
@@ -662,15 +650,17 @@
 // LTO-NEXT: 3: input, "[[INPUT]]", hip, (device-hip, gfx908)
 // LTO-NEXT: 4: preprocessor, {3}, hip-cpp-output, (device-hip, gfx908)
 // LTO-NEXT: 5: compiler, {4}, ir, (device-hip, gfx908)
-// LTO-NEXT: 6: offload, "device-hip (amdgcn-amd-amdhsa:gfx908)" {5}, ir
-// LTO-NEXT: 7: input, "[[INPUT]]", hip, (device-hip, gfx90a)
-// LTO-NEXT: 8: preprocessor, {7}, hip-cpp-output, (device-hip, gfx90a)
-// LTO-NEXT: 9: compiler, {8}, ir, (device-hip, gfx90a)
-// LTO-NEXT: 10: offload, "device-hip (amdgcn-amd-amdhsa:gfx90a)" {9}, ir
-// LTO-NEXT: 11: llvm-offload-binary, {6, 10}, image, (device-hip)
-// LTO-NEXT: 12: offload, "host-hip (x86_64-unknown-linux-gnu)" {2}, 
"device-hip (x86_64-unknown-linux-gnu)" {11}, ir
-// LTO-NEXT: 13: backend, {12}, assembler, (host-hip)
-// LTO-NEXT: 14: assembler, {13}, object, (host-hip)
+// LTO-NEXT: 6: backend, {5}, lto-bc, (device-hip, gfx908)
+// LTO-NEXT: 7: offload, "device-hip (amdgcn-amd-amdhsa:gfx908)" {6}, lto-bc
+// LTO-NEXT: 8: input, "[[INPUT]]", hip, (device-hip, gfx90a)
+// LTO-NEXT: 9: preprocessor, {8}, hip-cpp-output, (device-hip, gfx90a)
+// LTO-NEXT: 10: compiler, {9}, ir, (device-hip, gfx90a)
+// LTO-NEXT: 11: backend, {10}, lto-bc, (device-hip, gfx90a)
+// LTO-NEXT: 12: offload, "device-hip (amdgcn-amd-amdhsa:gfx90a)" {11}, lto-bc
+// LTO-NEXT: 13: llvm-offload-binary, {7, 12}, image, (device-hip)
+// LTO-NEXT: 14: offload, "host-hip (x86_64-unknown-linux-gnu)" {2}, 
"device-hip (x86_64-unknown-linux-gnu)" {13}, ir
+// LTO-NEXT: 15: backend, {14}, assembler, (host-hip)
+// LTO-NEXT: 16: assembler, {15}, object, (host-hip)
 
 //
 // Test the new driver when not bundling
@@ -693,15 +683,14 @@
 //      SPIRV-ONLY: 0: input, "[[INPUT:.+]]", hip, (device-hip, gfx1030)
 // SPIRV-ONLY-NEXT: 1: preprocessor, {0}, hip-cpp-output, (device-hip, gfx1030)
 // SPIRV-ONLY-NEXT: 2: compiler, {1}, ir, (device-hip, gfx1030)
-// SPIRV-ONLY-NEXT: 3: backend, {2}, assembler, (device-hip, gfx1030)
-// SPIRV-ONLY-NEXT: 4: assembler, {3}, object, (device-hip, gfx1030)
-// SPIRV-ONLY-NEXT: 5: linker, {4}, image, (device-hip, gfx1030)
-// SPIRV-ONLY-NEXT: 6: offload, "device-hip (amdgcn-amd-amdhsa:gfx1030)" {5}, 
image
-// SPIRV-ONLY-NEXT: 7: input, "[[INPUT]]", hip, (device-hip, amdgcnspirv)
-// SPIRV-ONLY-NEXT: 8: preprocessor, {7}, hip-cpp-output, (device-hip, 
amdgcnspirv)
-// SPIRV-ONLY-NEXT: 9: compiler, {8}, ir, (device-hip, amdgcnspirv)
-// SPIRV-ONLY-NEXT: 10: backend, {9}, ir, (device-hip, amdgcnspirv)
-// SPIRV-ONLY-NEXT: 11: linker, {10}, image, (device-hip, amdgcnspirv)
-// SPIRV-ONLY-NEXT: 12: offload, "device-hip (spirv64-amd-amdhsa:amdgcnspirv)" 
{11}, image
-// SPIRV-ONLY-NEXT: 13: linker, {6, 12}, hip-fatbin, (device-hip)
-// SPIRV-ONLY-NEXT: 14: offload, "device-hip (amdgcn-amd-amdhsa)" {13}, none
+// SPIRV-ONLY-NEXT: 3: backend, {2}, lto-bc, (device-hip, gfx1030)
+// SPIRV-ONLY-NEXT: 4: linker, {3}, image, (device-hip, gfx1030)
+// SPIRV-ONLY-NEXT: 5: offload, "device-hip (amdgcn-amd-amdhsa:gfx1030)" {4}, 
image
+// SPIRV-ONLY-NEXT: 6: input, "[[INPUT]]", hip, (device-hip, amdgcnspirv)
+// SPIRV-ONLY-NEXT: 7: preprocessor, {6}, hip-cpp-output, (device-hip, 
amdgcnspirv)
+// SPIRV-ONLY-NEXT: 8: compiler, {7}, ir, (device-hip, amdgcnspirv)
+// SPIRV-ONLY-NEXT: 9: backend, {8}, lto-bc, (device-hip, amdgcnspirv)
+// SPIRV-ONLY-NEXT: 10: linker, {9}, image, (device-hip, amdgcnspirv)
+// SPIRV-ONLY-NEXT: 11: offload, "device-hip (spirv64-amd-amdhsa:amdgcnspirv)" 
{10}, image
+// SPIRV-ONLY-NEXT: 12: linker, {5, 11}, hip-fatbin, (device-hip)
+// SPIRV-ONLY-NEXT: 13: offload, "device-hip (amdgcn-amd-amdhsa)" {12}, none
diff --git a/clang/test/Driver/hip-rdc-device-only.hip 
b/clang/test/Driver/hip-rdc-device-only.hip
index d74de571e3a32..b946f198160a2 100644
--- a/clang/test/Driver/hip-rdc-device-only.hip
+++ b/clang/test/Driver/hip-rdc-device-only.hip
@@ -70,8 +70,8 @@
 // COMMON-SAME: "-fapply-global-visibility-to-externs"
 // COMMON-SAME: "-target-cpu" "gfx803"
 // COMMON-SAME: "-fgpu-rdc"
-// EMITBC-SAME: {{.*}} "-o" {{".*a.*bc"}} "-x" "hip"
-// EMITLL-SAME: {{.*}} "-o" {{".*a.*ll"}} "-x" "hip"
+// EMITBC-SAME: {{.*}} "-o" {{".*a.*\.(bc|o)"}} "-x" "hip"
+// EMITLL-SAME: {{.*}} "-o" {{".*a.*\.(s|ll)"}} "-x" "hip"
 // CHECK-SAME: {{.*}} {{".*a.cu"}}
 
 // COMMON: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
@@ -83,13 +83,13 @@
 // COMMON-SAME: "-fapply-global-visibility-to-externs"
 // COMMON-SAME: "-target-cpu" "gfx900"
 // COMMON-SAME: "-fgpu-rdc"
-// EMITBC-SAME: {{.*}} "-o" {{".*a.*bc"}} "-x" "hip"
-// EMITLL-SAME: {{.*}} "-o" {{".*a.*ll"}} "-x" "hip"
+// EMITBC-SAME: {{.*}} "-o" {{".*a.*\.(bc|o)"}} "-x" "hip"
+// EMITLL-SAME: {{.*}} "-o" {{".*a.*\.(s|ll)"}} "-x" "hip"
 // COMMON-SAME: {{.*}} {{".*a.cu"}}
 
-// COMMON: "{{.*}}clang-offload-bundler" "-type={{(bc|ll)}}"
+// COMMON: "{{.*}}clang-offload-bundler" "-type={{(bc|ll|o|s)}}"
 // COMMON-SAME: 
"-targets=hip-amdgcn-amd-amdhsa-unknown-gfx803,hip-amdgcn-amd-amdhsa-unknown-gfx900"
-// COMMON-SAME: "-output=a-hip-amdgcn-amd-amdhsa.{{(bc|ll)}}"
+// COMMON-SAME: "-output=a-hip-amdgcn-amd-amdhsa.{{(bc|ll|o|s)}}"
 
 // COMMON: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // COMMON-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
@@ -100,8 +100,8 @@
 // COMMON-SAME: "-fapply-global-visibility-to-externs"
 // COMMON-SAME: "-target-cpu" "gfx803"
 // COMMON-SAME: "-fgpu-rdc"
-// EMITBC-SAME: {{.*}} "-o" {{".*b.*bc"}} "-x" "hip"
-// EMITLL-SAME: {{.*}} "-o" {{".*b.*ll"}} "-x" "hip"
+// EMITBC-SAME: {{.*}} "-o" {{".*b.*\.(bc|o)"}} "-x" "hip"
+// EMITLL-SAME: {{.*}} "-o" {{".*b.*\.(s|ll)"}} "-x" "hip"
 // COMMON-SAME: {{.*}} {{".*b.hip"}}
 
 // COMMON: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
@@ -113,13 +113,13 @@
 // COMMON-SAME: "-fapply-global-visibility-to-externs"
 // COMMON-SAME: "-target-cpu" "gfx900"
 // COMMON-SAME: "-fgpu-rdc"
-// EMITBC-SAME: {{.*}} "-o" {{".*b.*bc"}} "-x" "hip"
-// EMITLL-SAME: {{.*}} "-o" {{".*b.*ll"}} "-x" "hip"
+// EMITBC-SAME: {{.*}} "-o" {{".*b.*\.(bc|o)"}} "-x" "hip"
+// EMITLL-SAME: {{.*}} "-o" {{".*b.*\.(s|ll)"}} "-x" "hip"
 // COMMON-SAME: {{.*}} {{".*b.hip"}}
 
-// COMMON: "{{.*}}clang-offload-bundler" "-type={{(bc|ll)}}"
+// COMMON: "{{.*}}clang-offload-bundler" "-type={{(bc|ll|o|s)}}"
 // COMMON-SAME: 
"-targets=hip-amdgcn-amd-amdhsa-unknown-gfx803,hip-amdgcn-amd-amdhsa-unknown-gfx900"
-// COMMON-SAME: "-output=b-hip-amdgcn-amd-amdhsa.{{(bc|ll)}}"
+// COMMON-SAME: "-output=b-hip-amdgcn-amd-amdhsa.{{(bc|ll|o|s)}}"
 
 // SAVETEMP: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" 
"-aux-triple" "x86_64-unknown-linux-gnu"
 // SAVETEMP-SAME: "-E"
@@ -132,7 +132,7 @@
 // SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" 
"x86_64-unknown-linux-gnu"
 // SAVETEMP-SAME: "-emit-llvm"
 // SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803"
-// SAVETEMP-SAME: {{.*}} "-o" {{"a.*.ll"}} "-x" "ir" [[A_GFX803_TMP_BC]]
+// SAVETEMP-SAME: {{.*}} "-o" {{"a.*\.s"}} "-x" "ir" [[A_GFX803_TMP_BC]]
 
 // SAVETEMP: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" 
"x86_64-unknown-linux-gnu"
 // SAVETEMP-SAME: "-E"
@@ -145,11 +145,11 @@
 // SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" 
"x86_64-unknown-linux-gnu"
 // SAVETEMP-SAME: "-emit-llvm"
 // SAVETEMP-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900"
-// SAVETEMP-SAME: {{.*}} "-o" {{"a.*.ll"}} "-x" "ir" [[A_GFX900_TMP_BC]]
+// SAVETEMP-SAME: {{.*}} "-o" {{"a.*\.s"}} "-x" "ir" [[A_GFX900_TMP_BC]]
 
-// SAVETEMP: "{{.*}}clang-offload-bundler" "-type=ll"
+// SAVETEMP: "{{.*}}clang-offload-bundler" "-type=s"
 // SAVETEMP-SAME: 
"-targets=hip-amdgcn-amd-amdhsa-unknown-gfx803,hip-amdgcn-amd-amdhsa-unknown-gfx900"
-// SAVETEMP-SAME: "-output=a-hip-amdgcn-amd-amdhsa.ll"
+// SAVETEMP-SAME: "-output=a-hip-amdgcn-amd-amdhsa.s"
 
 // SAVETEMP: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" 
"x86_64-unknown-linux-gnu"
 // SAVETEMP-SAME: "-E"
@@ -162,7 +162,7 @@
 // SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" 
"x86_64-unknown-linux-gnu"
 // SAVETEMP-SAME: "-emit-llvm"
 // SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" 
"gfx803"
-// SAVETEMP-SAME: {{.*}} "-o" {{"b.*.ll"}} "-x" "ir" [[B_GFX803_TMP_BC]]
+// SAVETEMP-SAME: {{.*}} "-o" {{"b.*\.s"}} "-x" "ir" [[B_GFX803_TMP_BC]]
 
 // SAVETEMP: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" 
"x86_64-unknown-linux-gnu"
 // SAVETEMP-SAME: "-E"
@@ -175,10 +175,10 @@
 // SAVETEMP-NEXT: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" 
"x86_64-unknown-linux-gnu"
 // SAVETEMP-SAME: "-emit-llvm"
 // SAVETEMP-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" 
"gfx900"
-// SAVETEMP-SAME: {{.*}} "-o" {{"b.*.ll"}} "-x" "ir" [[B_GFX900_TMP_BC]]
+// SAVETEMP-SAME: {{.*}} "-o" {{"b.*\.s"}} "-x" "ir" [[B_GFX900_TMP_BC]]
 
-// SAVETEMP: "{{.*}}clang-offload-bundler" "-type=ll"
+// SAVETEMP: "{{.*}}clang-offload-bundler" "-type=s"
 // SAVETEMP-SAME: 
"-targets=hip-amdgcn-amd-amdhsa-unknown-gfx803,hip-amdgcn-amd-amdhsa-unknown-gfx900"
-// SAVETEMP-SAME: "-output=b-hip-amdgcn-amd-amdhsa.ll"
+// SAVETEMP-SAME: "-output=b-hip-amdgcn-amd-amdhsa.s"
 
 // FAIL: error: cannot specify -o when generating multiple output files
diff --git a/clang/test/Driver/hip-sanitize-options.hip 
b/clang/test/Driver/hip-sanitize-options.hip
index a5b06d5cbb26f..e7b315711b216 100644
--- a/clang/test/Driver/hip-sanitize-options.hip
+++ b/clang/test/Driver/hip-sanitize-options.hip
@@ -102,10 +102,10 @@
 // CHECK-NOT: {{"[^"]*lld(\.exe){0,1}".* ".*hip.bc"}}
 // CHECK: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* 
"-fsanitize=address"}}
 
-// NORDC: {{"[^"]*clang[^"]*".* "-emit-llvm-bc".* "-fcuda-is-device".* .* 
"-mlink-bitcode-file" ".*asanrtl.bc".* "-fsanitize=address".*}} "-o" 
"[[OUT:[^"]*.bc]]"
+// NORDC: {{"[^"]*clang[^"]*".* "-emit-llvm-bc".* "-fcuda-is-device".* .* 
"-mlink-bitcode-file" ".*asanrtl.bc".* "-fsanitize=address".*}} "-o" 
"[[OUT:[^"]*]]"
 // NORDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* 
"-fsanitize=address"}}
 
-// RDC: {{"[^"]*clang[^"]*".* "-emit-llvm-bc".* "-fcuda-is-device".* 
"-mlink-bitcode-file" ".*asanrtl.bc".* "-fsanitize=address".*}} "-o" 
"[[OUT:[^"]*.bc]]"
+// RDC: {{"[^"]*clang[^"]*".* "-emit-llvm-bc".* "-fcuda-is-device".* 
"-mlink-bitcode-file" ".*asanrtl.bc".* "-fsanitize=address".*}} "-o" 
"[[OUT:[^"]*]]"
 // RDC: {{"[^"]*clang[^"]*".* "-triple" "x86_64-unknown-linux-gnu".* 
"-fsanitize=address"}}
 
 // FAIL: error: cannot find ROCm device library for ABI version 5; provide its 
path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to 
build without ROCm device library
diff --git a/clang/test/Driver/hip-save-temps.hip 
b/clang/test/Driver/hip-save-temps.hip
index 31177f17d3763..6ad0ef6dc4e77 100644
--- a/clang/test/Driver/hip-save-temps.hip
+++ b/clang/test/Driver/hip-save-temps.hip
@@ -39,13 +39,7 @@
 // NORDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} 
"-disable-llvm-passes" {{.*}} "-o" 
"hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc"
 // RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} 
"-disable-llvm-passes" {{.*}} "-o" 
"hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.tmp.bc"
 
-// NORDC: {{".*clang.*"}} "-cc1" {{.*}} "-S"
-// NORDC-SAME: "-mllvm" "-amdgpu-internalize-symbols"
-// NORDC-SAME: [[CPU]]
-// NORDC-SAME: "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.s"
-
-// RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} 
"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.bc"
-// NORDC: {{".*clang.*"}} "-cc1as" {{.*}} "-filetype" "obj" {{.*}} [[CPU]] 
{{.*}} "-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.o"
+// RDC: {{".*clang.*"}} "-cc1" {{.*}} "-emit-llvm-bc" {{.*}} [[CPU]] {{.*}} 
"-o" "hip-save-temps-hip-amdgcn-amd-amdhsa-gfx900.{{(bc|o)}}"
 // CHECK-NOT: "{{.*}}llvm-link"
 // CHECK-NOT: "{{.*}}opt"
 // CHECK-NOT: "{{.*}}llc"
diff --git a/clang/test/Driver/hip-spirv-backend-bindings.c 
b/clang/test/Driver/hip-spirv-backend-bindings.c
index bd885cdbeef63..fc85f663689c3 100644
--- a/clang/test/Driver/hip-spirv-backend-bindings.c
+++ b/clang/test/Driver/hip-spirv-backend-bindings.c
@@ -10,7 +10,7 @@
 
 // CHECK-SPIRV-BASE: # "spirv64-amd-amdhsa" - "clang", inputs: 
["[[INPUT:.+]]"], output: "[[HIPI:.+\.hipi]]"
 // CHECK-SPIRV-BASE: # "spirv64-amd-amdhsa" - "clang", inputs: ["[[HIPI]]"], 
output: "[[SPV_TMP_BC:.+\.tmp\.bc]]"
-// CHECK-SPIRV-BASE: # "spirv64-amd-amdhsa" - "clang", inputs: 
["[[SPV_TMP_BC]]"], output: "[[SPV_BC:.+\.bc]]"
+// CHECK-SPIRV-BASE: # "spirv64-amd-amdhsa" - "clang", inputs: 
["[[SPV_TMP_BC]]"], output: "[[SPV_BC:.+\.o]]"
 // CHECK-SPIRV: # "spirv64-amd-amdhsa" - "Offload::Packager", inputs: 
["[[SPV_BC]]"], output: "[[HIP_OUT:.+\.out]]"
 // CHECK-SPIRV: # "spirv64-amd-amdhsa" - "Offload::Linker", inputs: 
["[[HIP_OUT]]"], output: "[[HIPFB:.+\.hipfb]]"
 // CHECK-SPIRV-RDC: # "x86_64-unknown-linux-gnu" - "Offload::Packager", 
inputs: ["[[SPV_BC]]"], output: "[[HIP_OUT:.+\.out]]"
@@ -30,9 +30,8 @@
 // RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-SPIRV-OFFLOAD-DEVICE-ONLY
 
 // CHECK-SPIRV-OFFLOAD-DEVICE-ONLY: # "spirv64-amd-amdhsa" - "clang", inputs: 
["[[INPUT:.+]]"], output: "[[HIPI:.+\.hipi]]"
-// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY: # "spirv64-amd-amdhsa" - "clang", inputs: 
["[[HIPI]]"], output: "[[SPV_BC:.+\.bc]]"
-// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY: # "spirv64-amd-amdhsa" - "clang", inputs: 
["[[SPV_BC]]"], output: "[[SPV_OUT:.+\.out]]"
-// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY: # "spirv64-amd-amdhsa" - "AMDGCN::Linker", 
inputs: ["[[SPV_OUT]]"], output: "{{.+\.hipfb}}"
+// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY: # "spirv64-amd-amdhsa" - "clang", inputs: 
["[[HIPI]]"], output: "[[SPV_TMP_BC:.+\.bc]]"
+// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY: # "spirv64-amd-amdhsa" - "clang", inputs: 
["[[SPV_TMP_BC]]"], output: "{{.+\.o}}"
 
 // RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu 
--offload-arch=amdgcnspirv \
 // RUN:         -nogpuinc -nogpulib -x hip %s -save-temps \
@@ -40,8 +39,8 @@
 // RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC
 
 // CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: # "spirv64-amd-amdhsa" - "clang", 
inputs: ["[[INPUT:.+]]"], output: "[[HIPI:.+\.hipi]]"
-// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: # "spirv64-amd-amdhsa" - "clang", 
inputs: ["[[HIPI]]"], output: "[[SPV_BC:.+\.bc]]"
-// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: # "spirv64-amd-amdhsa" - "clang", 
inputs: ["[[SPV_BC]]"], output: "{{.+}}"
+// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: # "spirv64-amd-amdhsa" - "clang", 
inputs: ["[[HIPI]]"], output: "[[SPV_TMP_BC:.+\.bc]]"
+// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: # "spirv64-amd-amdhsa" - "clang", 
inputs: ["[[SPV_TMP_BC]]"], output: "{{.+\.(o|s)}}"
 
 // RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu 
--offload-arch=amdgcnspirv \
 // RUN:         -nogpuinc -nogpulib -x hip %s -save-temps \
diff --git a/clang/test/Driver/hip-spirv-backend-opt.c 
b/clang/test/Driver/hip-spirv-backend-opt.c
index 10d9a0b01caf3..91fddfb05e8e9 100644
--- a/clang/test/Driver/hip-spirv-backend-opt.c
+++ b/clang/test/Driver/hip-spirv-backend-opt.c
@@ -46,16 +46,10 @@
 // RUN:         -use-spirv-backend -no-canonical-prefixes \
 // RUN: 2>&1 | FileCheck %s 
--check-prefixes=CHECK-SPIRV-TRANSLATOR,CHECK-SPIRV-BACKEND-BC,CHECK-CLANG-LINKER-WRAPPER
 
-// RUN: %clang --no-offload-new-driver --target=x86_64-unknown-linux-gnu 
--offload-arch=amdgcnspirv \
-// RUN:         -nogpuinc -nogpulib -### -x hip %s -save-temps  \
-// RUN:         -use-spirv-backend -no-canonical-prefixes \
-// RUN: 2>&1 | FileCheck %s 
--check-prefixes=CHECK-SPIRV-TRANSLATOR,CHECK-SPIRV-BACKEND-BC,CHECK-SPIRV-BACKEND-BINARY-EQ-TRIPLE
-
 // CHECK-SPIRV-TRANSLATOR-NOT: "{{.*llvm-spirv.*}}"
 // CHECK-SPIRV-BACKEND-TEXTUAL: "{{.*clang(\.exe)?}}" "-cc1" "-triple" 
"spirv64-amd-amdhsa" {{.*}} "-S"
-// CHECK-SPIRV-BACKEND-BINARY: "{{.*clang(\.exe)?}}" "-cc1" "-triple" 
"spirv64-amd-amdhsa" {{.*}} "-emit-obj"
+// CHECK-SPIRV-BACKEND-BINARY: "{{.*clang(\.exe)?}}" "-cc1" "-triple" 
"spirv64-amd-amdhsa" {{.*}} "-emit-llvm-bc"
 // CHECK-SPIRV-BACKEND-BC: "{{.*clang(\.exe)?}}" "-cc1" "-triple" 
"spirv64-amd-amdhsa" {{.*}} "-emit-llvm-bc"
 // CHECK-SPIRV-BACKEND-LL: "{{.*clang(\.exe)?}}" "-cc1" "-triple" 
"spirv64-amd-amdhsa" {{.*}} "-emit-llvm"
-// CHECK-SPIRV-BACKEND-BINARY-EQ-TRIPLE: "{{.*clang(\.exe)?}}" "-cc1" 
{{.*}}"-triple=spirv64-amd-amdhsa" {{.*}}"-emit-obj"
 // CHECK-FGPU-RDC-SAME: {{.*}} "-fgpu-rdc"
 // CHECK-CLANG-LINKER-WRAPPER: "{{.*}}clang-linker-wrapper" 
"--should-extract=amdgcnspirv" {{.*}} 
"--device-compiler=spirv64-amd-amdhsa=-use-spirv-backend"
diff --git a/clang/test/Driver/hip-spirv-backend-phases.c 
b/clang/test/Driver/hip-spirv-backend-phases.c
index 857b68124d4c2..c75334e8a4db9 100644
--- a/clang/test/Driver/hip-spirv-backend-phases.c
+++ b/clang/test/Driver/hip-spirv-backend-phases.c
@@ -10,8 +10,8 @@
 // CHECK-SPIRV-BINARY: [[P3:[0-9]+]]: input, "[[INPUT]].c", hip, (device-hip, 
amdgcnspirv)
 // CHECK-SPIRV-BINARY: [[P4:[0-9]+]]: preprocessor,  {[[P3]]}, hip-cpp-output, 
(device-hip, amdgcnspirv)
 // CHECK-SPIRV-BINARY: [[P5:[0-9]+]]: compiler,  {[[P4]]}, ir, (device-hip, 
amdgcnspirv)
-// CHECK-SPIRV-BINARY: [[P6:[0-9]+]]: backend,  {[[P5]]}, ir, (device-hip, 
amdgcnspirv)
-// CHECK-SPIRV-BINARY: [[P7:[0-9]+]]: offload,  "device-hip 
(spirv64-amd-amdhsa:amdgcnspirv)" {[[P6]]}, ir
+// CHECK-SPIRV-BINARY: [[P6:[0-9]+]]: backend,  {[[P5]]}, lto-bc, (device-hip, 
amdgcnspirv)
+// CHECK-SPIRV-BINARY: [[P7:[0-9]+]]: offload,  "device-hip 
(spirv64-amd-amdhsa:amdgcnspirv)" {[[P6]]}, lto-bc
 // CHECK-SPIRV-BINARY: [[P8:[0-9]+]]: llvm-offload-binary, {[[P7]]}, image, 
(device-hip)
 // CHECK-SPIRV-BINARY: [[P9:[0-9]+]]: clang-linker-wrapper, {[[P8]]}, 
hip-fatbin, (device-hip)
 
@@ -32,8 +32,8 @@
 // CHECK-SPIRV-BINARY-RDC: [[P3:[0-9]+]]: input, "[[INPUT]].c", hip, 
(device-hip, amdgcnspirv)
 // CHECK-SPIRV-BINARY-RDC: [[P4:[0-9]+]]: preprocessor,  {[[P3]]}, 
hip-cpp-output, (device-hip, amdgcnspirv)
 // CHECK-SPIRV-BINARY-RDC: [[P5:[0-9]+]]: compiler,  {[[P4]]}, ir, 
(device-hip, amdgcnspirv)
-// CHECK-SPIRV-BINARY-RDC: [[P6:[0-9]+]]: backend,  {[[P5]]}, ir, (device-hip, 
amdgcnspirv)
-// CHECK-SPIRV-BINARY-RDC: [[P7:[0-9]+]]: offload,  "device-hip 
(spirv64-amd-amdhsa:amdgcnspirv)" {[[P6]]}, ir
+// CHECK-SPIRV-BINARY-RDC: [[P6:[0-9]+]]: backend,  {[[P5]]}, lto-bc, 
(device-hip, amdgcnspirv)
+// CHECK-SPIRV-BINARY-RDC: [[P7:[0-9]+]]: offload,  "device-hip 
(spirv64-amd-amdhsa:amdgcnspirv)" {[[P6]]}, lto-bc
 // CHECK-SPIRV-BINARY-RDC: [[P8:[0-9]+]]: llvm-offload-binary, {[[P7]]}, 
image, (device-hip)
 
 // CHECK-SPIRV-BINARY-RDC: [[P9:[0-9]+]]: offload, "host-hip 
(x86_64-unknown-linux-gnu)" {[[P2]]}, "device-hip (x86_64-unknown-linux-gnu)" 
{[[P8]]}, ir
@@ -49,10 +49,8 @@
 // CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P0:[0-9]+]]: input, "{{.*}}.c", 
hip, (device-hip, amdgcnspirv)
 // CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P1:[0-9]+]]: preprocessor, 
{[[P0]]}, hip-cpp-output, (device-hip, amdgcnspirv)
 // CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P2:[0-9]+]]: compiler, {[[P1]]}, 
ir, (device-hip, amdgcnspirv)
-// CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P3:[0-9]+]]: backend, {[[P2]]}, 
image, (device-hip, amdgcnspirv)
-// CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P4:[0-9]+]]: offload, "device-hip 
(spirv64-amd-amdhsa:amdgcnspirv)" {[[P3]]}, image
-// CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P5:[0-9]+]]: linker, {[[P4]]}, 
hip-fatbin, (device-hip)
-// CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P6:[0-9]+]]: offload, "device-hip 
(spirv64-amd-amdhsa)" {[[P5]]}, none
+// CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P3:[0-9]+]]: backend, {[[P2]]}, 
lto-bc, (device-hip, amdgcnspirv)
+// CHECK-SPIRV-BINARY-OFFLOAD-DEVICE-ONLY: [[P4:[0-9]+]]: offload, "device-hip 
(spirv64-amd-amdhsa:amdgcnspirv)" {[[P3]]}, none
 
 // RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu 
--offload-arch=amdgcnspirv \
 // RUN:         -nogpuinc -nogpulib -x hip %s -save-temps \ 
@@ -62,7 +60,7 @@
 // CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: [[P0:[0-9]+]]: input, "{{.*}}.c", hip, 
(device-hip, amdgcnspirv)
 // CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, 
hip-cpp-output, (device-hip, amdgcnspirv)
 // CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, 
(device-hip, amdgcnspirv)
-// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: [[P3:[0-9]+]]: backend, {[[P2]]}, ir, 
(device-hip, amdgcnspirv)
+// CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: [[P3:[0-9]+]]: backend, {[[P2]]}, 
lto-{{(bc|ir)}}, (device-hip, amdgcnspirv)
 // CHECK-SPIRV-OFFLOAD-DEVICE-ONLY-RDC: [[P4:[0-9]+]]: offload, "device-hip 
(spirv64-amd-amdhsa:amdgcnspirv)" {[[P3]]}, none
 
 // RUN: %clang --offload-new-driver --target=x86_64-unknown-linux-gnu 
--offload-arch=amdgcnspirv \
diff --git a/clang/test/Driver/hip-spirv-linker-crash.c 
b/clang/test/Driver/hip-spirv-linker-crash.c
index 3f8835f2b5696..af846a4863685 100644
--- a/clang/test/Driver/hip-spirv-linker-crash.c
+++ b/clang/test/Driver/hip-spirv-linker-crash.c
@@ -12,4 +12,4 @@
 // CHECK: "{{.*}}llvm-link"
 // CHECK-NOT: opt-bisect-limit
 // CHECK-NOT: -mllvm
-// CHECK-SAME: "-o" "{{.*}}.bc" "{{.*}}.bc"{{$}}
+// CHECK-SAME: "-o" "{{.*}}.bc" "{{.*}}.o"{{$}}
diff --git a/clang/test/Driver/hip-target-id.hip 
b/clang/test/Driver/hip-target-id.hip
index fee430fe08c8d..2d4e30991836f 100644
--- a/clang/test/Driver/hip-target-id.hip
+++ b/clang/test/Driver/hip-target-id.hip
@@ -11,7 +11,7 @@
 // RUN:   --offload-arch=gfx908:xnack+:sramecc- \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   --no-offload-new-driver -save-temps \
-// RUN:   %s 2>&1 | FileCheck --check-prefixes=CHECK,TMP %s
+// RUN:   %s 2>&1 | FileCheck --check-prefixes=CHECK %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu \
 // RUN:   -x hip \
@@ -26,11 +26,6 @@
 // CHECK-SAME: "-target-feature" "+sramecc"
 // CHECK-SAME: "-target-feature" "+xnack"
 
-// TMP: [[CLANG:"[^"]*clang[^"]*"]] "-cc1as" "-triple" "amdgcn-amd-amdhsa"
-// TMP-SAME: "-target-cpu" "gfx908"
-// TMP-SAME: "-target-feature" "+sramecc"
-// TMP-SAME: "-target-feature" "+xnack"
-
 // CHECK: [[LLD:"[^"]*lld[^"]*"]] {{.*}} "-plugin-opt=mcpu=gfx908"
 // CHECK-SAME: "-plugin-opt=-mattr=+sramecc,+xnack"
 
diff --git a/clang/test/Driver/hip-toolchain-device-only.hip 
b/clang/test/Driver/hip-toolchain-device-only.hip
index c0621854f17ce..6af8c719e8f01 100644
--- a/clang/test/Driver/hip-toolchain-device-only.hip
+++ b/clang/test/Driver/hip-toolchain-device-only.hip
@@ -14,7 +14,7 @@
 // CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]]
 
 // CHECK: [[CLANG:".*clang.*"]] "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa"
-// CHECK-SAME: "-emit-obj"
+// CHECK-SAME: "-emit-llvm-bc"
 // CHECK-SAME: "-fcuda-is-device"
 // CHECK-SAME: "-target-cpu" "gfx900"
 // CHECK-SAME: {{.*}} "-o" [[OBJ_DEV_A_900:".*o"]] "-x" "hip"
diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip 
b/clang/test/Driver/hip-toolchain-no-rdc.hip
index 859bd6b88ab31..650c4a108e507 100644
--- a/clang/test/Driver/hip-toolchain-no-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-no-rdc.hip
@@ -36,10 +36,11 @@
 // RUN:   %t/a.o %t/b.o \
 // RUN: 2>&1 | FileCheck -check-prefixes=LKONLY %s
 
-// RUN: %clang -### --target=x86_64-linux-gnu --no-offload-new-driver \
-// RUN:   --offload-arch=amdgcnspirv --offload-arch=gfx900 \
-// RUN:   %s -nogpuinc -nogpulib \
-// RUN: 2>&1 | FileCheck -check-prefixes=AMDGCNSPIRV %s
+// FIXME: AMDGCNSPIRV + no-offload-new-driver crashes (separate bug).
+// UN: %clang -### --target=x86_64-linux-gnu --no-offload-new-driver \
+// UN:   --offload-arch=amdgcnspirv --offload-arch=gfx900 \
+// UN:   %s -nogpuinc -nogpulib -fno-lto \
+// UN: 2>&1 | FileCheck -check-prefixes=AMDGCNSPIRV %s
 
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-new-driver \
 // RUN:   --offload-arch=amdgcnspirv --offload-arch=gfx900 \
@@ -59,8 +60,7 @@
 
 // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
-// OLD-SAME: "-emit-obj"
-// NEW-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-emit-llvm-bc"
 // CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
 // CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-mllvm" 
"-amdgpu-internalize-symbols"
 // CHECK-SAME: "-fvisibility=hidden"
@@ -114,7 +114,7 @@
 // NEW-SAME: 
"--image=file=[[OBJ_DEV_A_900]],triple=amdgcn-amd-amdhsa,arch=gfx900,kind=hip"
 
 // NEW: [[WRAPPER:".*clang-linker-wrapper]]"
-// NEW-SAME: "--no-lto"
+// NEW-SAME: "--device-compiler=amdgcn-amd-amdhsa=-flto=full"
 // NEW-SAME: "--host-triple=x86_64-unknown-linux-gnu"
 // NEW-SAME: "--emit-fatbin-only"
 // NEW-SAME: "-o" "[[HIPFB_A:.*.hipfb]]" "[[PACKAGE_A]]"
@@ -188,7 +188,7 @@
 // NEW-SAME: 
"--image=file=[[OBJ_DEV_B_900]],triple=amdgcn-amd-amdhsa,arch=gfx900,kind=hip"
 
 // NEW: [[WRAPPER:".*clang-linker-wrapper]]"
-// NEW-SAME: "--no-lto"
+// NEW-SAME: "--device-compiler=amdgcn-amd-amdhsa=-flto=full"
 // NEW-SAME: "--emit-fatbin-only"
 // NEW-SAME: "-o" "[[HIPFB_B:.*.hipfb]]" "[[PACKAGE_B]]"
 
@@ -229,7 +229,7 @@
 // AMDGCNSPIRV: {{".*clang-offload-bundler.*"}} "-type=o"
 // AMDGCNSPIRV-SAME: 
"-targets={{.*}}hip-spirv64-amd-amdhsa--amdgcnspirv,hip-amdgcn-amd-amdhsa--gfx900"
 // AMDGCNSPIRV-SAME: "-input=[[AMDGCNSPV_CO]]" "-input=[[GFX900_CO]]"
-// AMDGCNSPIRV-NEW: "-cc1" "-triple" "spirv64-amd-amdhsa" 
{{.*}}"-emit-llvm-bc" {{.*}} "-o" "[[AMDGCNSPV_BC:.*bc]]"
+// AMDGCNSPIRV-NEW: "-cc1" "-triple" "spirv64-amd-amdhsa" 
{{.*}}"-emit-llvm-bc" {{.*}} "-o" "[[AMDGCNSPV_BC:[^"]*]]"
 
 // Check verbose printing with the new driver.
 // RUN: %clang -### --target=x86_64-linux-gnu -fno-gpu-rdc -nogpulib -nogpuinc 
\
diff --git a/clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip 
b/clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip
index 4439547ea8ad9..455b294a6e277 100644
--- a/clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip
+++ b/clang/test/Driver/hip-toolchain-rdc-flto-partitions.hip
@@ -12,7 +12,7 @@
 // FIXED-PARTS: [[LLD: ".*lld.*"]] {{.*}} 
"-plugin-opt=-amdgpu-internalize-symbols"
 // FIXED-PARTS-SAME: "--lto-partitions=42"
 // FIXED-PARTS-SAME: "-plugin-opt=mcpu=gfx803"
-// FIXED-PARTS-SAME: "-o" "{{.*out}}" "{{.*bc}}"
+// FIXED-PARTS-SAME: "-o" "{{.*out}}" "{{.*o}}"
 
 // RUN: not %clang -### --target=x86_64-linux-gnu \
 // RUN:   -x hip --cuda-gpu-arch=gfx803 -flto-partitions=a \
diff --git a/clang/test/Driver/hip-toolchain-rdc-separate.hip 
b/clang/test/Driver/hip-toolchain-rdc-separate.hip
index 574a23db36a79..b515050b36305 100644
--- a/clang/test/Driver/hip-toolchain-rdc-separate.hip
+++ b/clang/test/Driver/hip-toolchain-rdc-separate.hip
@@ -12,24 +12,26 @@
 // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-flto=full" "-flto-unit"
 // CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
 // CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" 
"-fvisibility=hidden"
 // CHECK-SAME: "-fapply-global-visibility-to-externs"
 // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
 // CHECK-SAME: "-target-cpu" "gfx803"
 // CHECK-SAME: "-fgpu-rdc"
-// CHECK-SAME: {{.*}} "-o" "[[A_BC1:.*bc]]" "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" "[[A_BC1:.*o]]" "-x" "hip"
 // CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-flto=full" "-flto-unit"
 // CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
 // CHECK-SAME: "-fcuda-is-device"
 // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
 // CHECK-SAME: "-target-cpu" "gfx900"
 // CHECK-SAME: "-fgpu-rdc"
-// CHECK-SAME: {{.*}} "-o" "[[A_BC2:.*bc]]" "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" "[[A_BC2:.*o]]" "-x" "hip"
 // CHECK-SAME: {{.*}} [[A_SRC]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
@@ -47,24 +49,26 @@
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-flto=full" "-flto-unit"
 // CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
 // CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" 
"-fvisibility=hidden"
 // CHECK-SAME: "-fapply-global-visibility-to-externs"
 // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
 // CHECK-SAME: "-target-cpu" "gfx803"
 // CHECK-SAME: "-fgpu-rdc"
-// CHECK-SAME: {{.*}} "-o" "[[B_BC1:.*bc]]" "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" "[[B_BC1:.*o]]" "-x" "hip"
 // CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-flto=full" "-flto-unit"
 // CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
 // CHECK-SAME: "-fcuda-is-device"
 // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
 // CHECK-SAME: "-target-cpu" "gfx900"
 // CHECK-SAME: "-fgpu-rdc"
-// CHECK-SAME: {{.*}} "-o" "[[B_BC2:.*bc]]" "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" "[[B_BC2:.*o]]" "-x" "hip"
 // CHECK-SAME: {{.*}} [[B_SRC]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
diff --git a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip 
b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip
index 05d276ba57bda..32a45296fcd96 100644
--- a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip
+++ b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip
@@ -27,21 +27,23 @@
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-flto=full" "-flto-unit"
 // CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
 // CHECK-SAME: "-fcuda-is-device"
 // CHECK-SAME: "-target-cpu" "gfx803"
 // CHECK-SAME: "-fgpu-rdc"
-// CHECK-SAME: {{.*}} "-o" [[A_BC1:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" [[A_BC1:".*o"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[A_SRC]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-flto=full" "-flto-unit"
 // CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
 // CHECK-SAME: "-fcuda-is-device"
 // CHECK-SAME: "-target-cpu" "gfx803"
 // CHECK-SAME: "-fgpu-rdc"
-// CHECK-SAME: {{.*}} "-o" [[B_BC1:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" [[B_BC1:".*o"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[B_SRC]]
 
 // CHECK-NOT: "*.llvm-link"
@@ -56,21 +58,23 @@
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-flto=full" "-flto-unit"
 // CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
 // CHECK-SAME: "-fcuda-is-device"
 // CHECK-SAME: "-target-cpu" "gfx900"
 // CHECK-SAME: "-fgpu-rdc"
-// CHECK-SAME: {{.*}} "-o" [[A_BC2:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" [[A_BC2:".*o"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[A_SRC]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 // CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-flto=full" "-flto-unit"
 // CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
 // CHECK-SAME: "-fcuda-is-device"
 // CHECK-SAME: "-target-cpu" "gfx900"
 // CHECK-SAME: "-fgpu-rdc"
-// CHECK-SAME: {{.*}} "-o" [[B_BC2:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" [[B_BC2:".*o"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[B_SRC]]
 
 // CHECK-NOT: "*.llvm-link"
diff --git a/clang/test/Driver/hip-toolchain-rdc.hip 
b/clang/test/Driver/hip-toolchain-rdc.hip
index 75d8f889a35a9..414561a8eadb1 100644
--- a/clang/test/Driver/hip-toolchain-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-rdc.hip
@@ -92,25 +92,27 @@
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" [[HOST:"x86_64-[^"]+"]]
 // CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-flto=full" "-flto-unit"
 // CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
 // CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" 
"-fvisibility=hidden"
 // CHECK-SAME: "-fapply-global-visibility-to-externs"
 // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
 // CHECK-SAME: "-target-cpu" "gfx803"
 // CHECK-SAME: "-fgpu-rdc"
-// CHECK-SAME: {{.*}} "-o" [[A_BC1:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" [[A_BC1:".*o"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[A_SRC]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" [[HOST]]
 // CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-flto=full" "-flto-unit"
 // CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
 // CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" 
"-fvisibility=hidden"
 // CHECK-SAME: "-fapply-global-visibility-to-externs"
 // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
 // CHECK-SAME: "-target-cpu" "gfx803"
 // CHECK-SAME: "-fgpu-rdc"
-// CHECK-SAME: {{.*}} "-o" [[B_BC1:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" [[B_BC1:".*o"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[B_SRC]]
 
 // CHECK-NOT: "*.llvm-link"
@@ -124,22 +126,24 @@
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" [[HOST]]
 // CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-flto=full" "-flto-unit"
 // CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
 // CHECK-SAME: "-fcuda-is-device"
 // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
 // CHECK-SAME: "-target-cpu" "gfx900"
-// CHECK-SAME: {{.*}} "-o" [[A_BC2:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" [[A_BC2:".*o"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[A_SRC]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" [[HOST]]
 // CHECK-SAME: "-emit-llvm-bc"
+// CHECK-SAME: "-flto=full" "-flto-unit"
 // CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
 // CHECK-SAME: "-fcuda-is-device"
 // CHECK-SAME: "{{.*}}lib1.bc" "{{.*}}lib2.bc"
 // CHECK-SAME: "-target-cpu" "gfx900"
 // CHECK-SAME: "-fgpu-rdc"
-// CHECK-SAME: {{.*}} "-o" [[B_BC2:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-o" [[B_BC2:".*o"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[B_SRC]]
 
 // CHECK-NOT: "*.llvm-link"
diff --git a/clang/test/Driver/hip-unbundle-preproc.hipi 
b/clang/test/Driver/hip-unbundle-preproc.hipi
index a56cafea0e8d4..2cbc0db59062a 100644
--- a/clang/test/Driver/hip-unbundle-preproc.hipi
+++ b/clang/test/Driver/hip-unbundle-preproc.hipi
@@ -7,7 +7,7 @@
 // RUN:   %s 2>&1 | FileCheck %s
 
 // CHECK: {{".*clang-offload-bundler.*"}} {{.*}}"-output=[[HOST_PP:.*hipi]]" 
"-output=[[DEV_PP:.*hipi]]" "-unbundle"
-// CHECK: {{".*clang.*"}} "-cc1" {{.*}}"-target-cpu" "gfx803" {{.*}}"-o" 
"[[DEV_O:.*o]]" {{.*}}"[[DEV_PP]]"
+// CHECK: {{".*clang.*"}} "-cc1" {{.*}}"-target-cpu" "gfx803" {{.*}}"-o" 
"[[DEV_O:[^"]*]]" {{.*}}"[[DEV_PP]]"
 // CHECK: {{".*lld.*"}} {{.*}}"-o" "[[DEV_ISA:.*]]" "[[DEV_O]]"
 // CHECK: {{".*clang-offload-bundler.*"}} {{.*}}"-input={{.*}}" 
"-input=[[DEV_ISA]]" "-output=[[FATBIN:.*]]"
 // CHECK: {{".*clang.*"}} {{.*}}"-triple" "x86_64-unknown-linux-gnu"{{.*}} 
"-fcuda-include-gpubinary" "[[FATBIN]]" {{.*}}"-o" "[[HOST_O:.*o]]" 
{{.*}}"[[HOST_PP]]"
@@ -20,7 +20,7 @@
 // RDC: {{".*clang-offload-bundler.*"}} {{.*}}"-output=[[HOST_PP:.*hipi]]" 
"-output=[[DEV_PP:.*hipi]]" "-unbundle"
 // RDC: {{".*clang.*"}} {{.*}}"-triple" "x86_64-unknown-linux-gnu"{{.*}} "-o" 
"[[HOST_O:.*o]]" {{.*}}"[[HOST_PP]]"
 // RDC: {{".*clang-offload-bundler.*"}} {{.*}}"-output=[[HOST_PP:.*hipi]]" 
"-output=[[DEV_PP:.*hipi]]" "-unbundle"
-// RDC: {{".*clang.*"}} "-cc1" {{.*}}"-target-cpu" "gfx803" {{.*}}"-o" 
"[[DEV_BC:.*bc]]" {{.*}}"[[DEV_PP]]"
+// RDC: {{".*clang.*"}} "-cc1" {{.*}}"-target-cpu" "gfx803" {{.*}}"-o" 
"[[DEV_BC:[^"]*]]" {{.*}}"[[DEV_PP]]"
 // RDC: {{".*lld.*"}} {{.*}}"-o" "[[DEV_ISA:.*]]" "[[DEV_BC]]"
 // RDC: {{".*clang-offload-bundler.*"}} {{.*}}"-input={{.*}}" 
"-input=[[DEV_ISA]]" "-output=[[FATBIN:.*]]"
 // RDC: {{".*clang.*"}} "-o" "[[FATBIN_O:.*o]]"
diff --git a/clang/test/Driver/hipspv-toolchain-rdc-separate.hip 
b/clang/test/Driver/hipspv-toolchain-rdc-separate.hip
index 6e597d69223a4..f4d71bfb8d631 100644
--- a/clang/test/Driver/hipspv-toolchain-rdc-separate.hip
+++ b/clang/test/Driver/hipspv-toolchain-rdc-separate.hip
@@ -15,11 +15,11 @@
 // CHECK-SAME: "-fvisibility=hidden" "-fapply-global-visibility-to-externs"
 // CHECK-SAME: "-mlink-builtin-bitcode" 
"[[HIP_PATH]]/lib/hip-device-lib/hipspv-spirv64-unknown-chipstar.bc"
 // CHECK-SAME: "-fgpu-rdc"
-// CHECK-SAME: "-o" "[[A_DEV_BC:.*bc]]" "-x" "hip"
+// CHECK-SAME: "-o" "[[A_DEV_OBJ:.*o]]" "-x" "hip"
 // CHECK-SAME: "[[A_SRC:.*a.cu]]"
 
 // CHECK: "{{.*llvm-offload-binary[^ ]*}}" "-o" "[[A_BIN_PACKAGE:.*.out]]"
-// CHECK-SAME: 
"--image=file=[[A_DEV_BC]],triple=spirv64-unknown-chipstar,arch=generic,kind=hip"
+// CHECK-SAME: 
"--image=file=[[A_DEV_OBJ]],triple=spirv64-unknown-chipstar,arch=generic,kind=hip"
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "[[HOST_TRIPLE]]"
 // CHECK-SAME: "-aux-triple" "spirv64-unknown-chipstar"
@@ -35,11 +35,11 @@
 // CHECK-SAME: "-fvisibility=hidden" "-fapply-global-visibility-to-externs"
 // CHECK-SAME: "-mlink-builtin-bitcode" 
"[[HIP_PATH]]/lib/hip-device-lib/hipspv-spirv64-unknown-chipstar.bc"
 // CHECK-SAME: "-fgpu-rdc"
-// CHECK-SAME: "-o" "[[B_DEV_BC:.*bc]]" "-x" "hip"
+// CHECK-SAME: "-o" "[[B_DEV_OBJ:.*o]]" "-x" "hip"
 // CHECK-SAME: "[[B_SRC:.*b.hip]]"
 
 // CHECK: "{{.*llvm-offload-binary[^ ]*}}" "-o" "[[B_BIN_PACKAGE:.*.out]]"
-// CHECK-SAME: 
"--image=file=[[B_DEV_BC]],triple=spirv64-unknown-chipstar,arch=generic,kind=hip"
+// CHECK-SAME: 
"--image=file=[[B_DEV_OBJ]],triple=spirv64-unknown-chipstar,arch=generic,kind=hip"
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "[[HOST_TRIPLE]]"
 // CHECK-SAME: "-aux-triple" "spirv64-unknown-chipstar"
diff --git a/clang/test/Driver/hipspv-toolchain-rdc.hip 
b/clang/test/Driver/hipspv-toolchain-rdc.hip
index 5a3f59a6287b2..2745e61d7b28a 100644
--- a/clang/test/Driver/hipspv-toolchain-rdc.hip
+++ b/clang/test/Driver/hipspv-toolchain-rdc.hip
@@ -32,20 +32,20 @@
 // Emit code (LLVM BC) for device side path.
 // OLD: [[CLANG]] "-cc1" "-triple" "spirv64"
 // OLD-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
-// OLD-SAME: "-emit-llvm-bc"
+// OLD-SAME: "-emit-obj"
 // OLD-SAME: "-fcuda-is-device"
 // OLD-SAME: "-fvisibility=hidden" "-fapply-global-visibility-to-externs"
 // OLD-SAME: "-fgpu-rdc"
-// OLD-SAME: {{.*}} "-o" [[A_BC1:".*bc"]] "-x" "hip"
+// OLD-SAME: {{.*}} "-o" [[A_BC1:".*o"]] "-x" "hip"
 // OLD-SAME: {{.*}} [[A_SRC]]
 
 // OLD: [[CLANG]] "-cc1" "-triple" "spirv64"
 // OLD-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
-// OLD-SAME: "-emit-llvm-bc"
+// OLD-SAME: "-emit-obj"
 // OLD-SAME: "-fcuda-is-device"
 // OLD-SAME: "-fvisibility=hidden" "-fapply-global-visibility-to-externs"
 // OLD-SAME: "-fgpu-rdc"
-// OLD-SAME: {{.*}} "-o" [[B_BC1:".*bc"]] "-x" "hip"
+// OLD-SAME: {{.*}} "-o" [[B_BC1:".*o"]] "-x" "hip"
 // OLD-SAME: {{.*}} [[B_SRC]]
 
 // Link device code, lower it with HIPSPV passes and emit SPIR-V binary.
@@ -74,11 +74,11 @@
 // NEW-SAME: "-fvisibility=hidden" "-fapply-global-visibility-to-externs"
 // NEW-SAME: "-mlink-builtin-bitcode" 
"[[HIP_PATH]]/lib/hip-device-lib/hipspv-spirv64-unknown-chipstar.bc"
 // NEW-SAME: "-fgpu-rdc"
-// NEW-SAME: "-o" "[[A_DEV_BC:.*bc]]" "-x" "hip"
+// NEW-SAME: "-o" "[[A_DEV_OBJ:.*o]]" "-x" "hip"
 // NEW-SAME: "[[A_SRC:.*a.cu]]"
 
 // NEW: "{{.*llvm-offload-binary[^ ]*}}" "-o" "[[A_BIN_PACKAGE:.*.out]]"
-// NEW-SAME: 
"--image=file=[[A_DEV_BC]],triple=[[OFFLOAD_TRIPLE]],arch=generic,kind=hip"
+// NEW-SAME: 
"--image=file=[[A_DEV_OBJ]],triple=[[OFFLOAD_TRIPLE]],arch=generic,kind=hip"
 
 // NEW: [[CLANG]] "-cc1" "-triple" "[[HOST_TRIPLE]]"
 // NEW-SAME: "-aux-triple" "[[OFFLOAD_TRIPLE]]"
@@ -94,11 +94,11 @@
 // NEW-SAME: "-fvisibility=hidden" "-fapply-global-visibility-to-externs"
 // NEW-SAME: "-mlink-builtin-bitcode" 
"[[HIP_PATH]]/lib/hip-device-lib/hipspv-spirv64-unknown-chipstar.bc"
 // NEW-SAME: "-fgpu-rdc"
-// NEW-SAME: "-o" "[[B_DEV_BC:.*bc]]" "-x" "hip"
+// NEW-SAME: "-o" "[[B_DEV_OBJ:.*o]]" "-x" "hip"
 // NEW-SAME: "[[B_SRC:.*b.hip]]"
 
 // NEW: "{{.*llvm-offload-binary[^ ]*}}" "-o" "[[B_BIN_PACKAGE:.*.out]]"
-// NEW-SAME: 
"--image=file=[[B_DEV_BC]],triple=[[OFFLOAD_TRIPLE]],arch=generic,kind=hip"
+// NEW-SAME: 
"--image=file=[[B_DEV_OBJ]],triple=[[OFFLOAD_TRIPLE]],arch=generic,kind=hip"
 
 // NEW: [[CLANG]] "-cc1" "-triple" "[[HOST_TRIPLE]]"
 // NEW-SAME: "-aux-triple" "[[OFFLOAD_TRIPLE]]"
@@ -110,6 +110,7 @@
 
 // NEW: "{{.*clang-linker-wrapper[^ ]*}}"
 // NEW-SAME: "--device-compiler=[[OFFLOAD_TRIPLE]]=--hip-path=[[HIP_PATH]]"
+// NEW-SAME: "--device-compiler=[[OFFLOAD_TRIPLE]]=-flto=full"
 // NEW-SAME: "--host-triple=[[HOST_TRIPLE]]"
 // NEW-SAME: "-o" "a.out"
 // NEW-SAME: "[[A_HOST_OBJ]]" "[[B_HOST_OBJ]]"
diff --git a/clang/test/Driver/hipspv-toolchain.hip 
b/clang/test/Driver/hipspv-toolchain.hip
index 7a9d58f546c98..64d5d22a11d39 100644
--- a/clang/test/Driver/hipspv-toolchain.hip
+++ b/clang/test/Driver/hipspv-toolchain.hip
@@ -38,7 +38,7 @@
 // NEW-SAME: "--image=file=[[OBJ_DEV]],triple=[[TRIPLE]],arch=generic,kind=hip"
 
 // NEW: {{".*clang-linker-wrapper"}} 
"--device-compiler=[[TRIPLE]]=--hip-path=[[HIP_PATH]]"
-// NEW-SAME: "--no-lto"
+// NEW-SAME: "--device-compiler=[[TRIPLE]]=-flto=full"
 // NEW-SAME: "--emit-fatbin-only" "-o" "[[BUNDLE:.*hipfb]]"
 
 // CHECK: [[CLANG]] "-cc1" "-triple" {{".*"}} "-aux-triple" "[[TRIPLE]]"
diff --git a/clang/test/Driver/openmp-offload-gpu.c 
b/clang/test/Driver/openmp-offload-gpu.c
index bf42ec7572b68..b5644e6ae445f 100644
--- a/clang/test/Driver/openmp-offload-gpu.c
+++ b/clang/test/Driver/openmp-offload-gpu.c
@@ -297,7 +297,7 @@
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S 
-fopenmp=libomp \
 // RUN:     -fopenmp-targets=nvptx64-nvidia-cuda 
-Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR-BC
-// CHECK-EMIT-LLVM-IR-BC: "-cc1"{{.*}}"-triple" 
"nvptx64-nvidia-cuda"{{.*}}"-emit-llvm-bc"
+// CHECK-EMIT-LLVM-IR-BC: "-cc1"{{.*}}"-triple" 
"nvptx64-nvidia-cuda"{{.*}}"-emit-llvm"
 
 // RUN:   %clang -### -fopenmp=libomp --offload-arch=sm_89 \
 // RUN:          --no-cuda-version-check \
diff --git a/clang/test/Driver/spirv-amd-toolchain.c 
b/clang/test/Driver/spirv-amd-toolchain.c
index c9bba1e437e11..f6a4d4578c140 100644
--- a/clang/test/Driver/spirv-amd-toolchain.c
+++ b/clang/test/Driver/spirv-amd-toolchain.c
@@ -3,7 +3,7 @@
 // PHASES: 0: input, "[[INPUT:.+]]", c
 // PHASES: 1: preprocessor, {0}, cpp-output
 // PHASES: 2: compiler, {1}, ir
-// PHASES: 3: backend, {2}, ir
+// PHASES: 3: backend, {2}, lto-bc
 // PHASES: 4: linker, {3}, image
 
 // RUN: %clang -### -ccc-print-phases -use-spirv-backend 
--target=spirv64-amd-amdhsa %s 2>&1 \
@@ -19,12 +19,12 @@
 
 // RUN: %clang -### --target=spirv64-amd-amdhsa %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=INVOCATION
-// INVOCATION: "-cc1" "-triple" "spirv64-amd-amdhsa" 
{{.*}}"-disable-llvm-optzns" {{.*}} "-o" "[[OUTPUT:.+]]" "-x" "c"
+// INVOCATION: "-cc1" "-triple" "spirv64-amd-amdhsa" {{.*}}"-flto=full" 
"-flto-unit"{{.*}}"-disable-llvm-optzns" {{.*}} "-o" "[[OUTPUT:.+]]" "-x" "c"
 // INVOCATION: "{{.*}}llvm-link" "-o" "[[LINKED_OUTPUT:.+]]" "[[OUTPUT]]"
 // INVOCATION: "{{.*}}llvm-spirv" "--spirv-max-version=1.6" "--spirv-ext=+all" 
"--spirv-allow-unknown-intrinsics" "--spirv-lower-const-expr" 
"--spirv-preserve-auxdata" "--spirv-debug-info-version=nonsemantic-shader-200" 
"[[LINKED_OUTPUT]]" "-o" "a.out"
 
 // RUN: %clang -### -use-spirv-backend --target=spirv64-amd-amdhsa %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=INVOCATION-SPIRV-BACKEND
-// INVOCATION-SPIRV-BACKEND: "-cc1" "-triple" "spirv64-amd-amdhsa" 
{{.*}}"-disable-llvm-optzns" {{.*}} "-o" "[[OUTPUT:.+]]" "-x" "c"
+// INVOCATION-SPIRV-BACKEND: "-cc1" "-triple" "spirv64-amd-amdhsa" 
{{.*}}"-flto=full" "-flto-unit"{{.*}}"-disable-llvm-optzns" {{.*}} "-o" 
"[[OUTPUT:.+]]" "-x" "c"
 // INVOCATION-SPIRV-BACKEND: "{{.*}}llvm-link" "-o" "[[LINKED_OUTPUT:.+]]" 
"[[OUTPUT]]"
 // INVOCATION-SPIRV-BACKEND: "-cc1" "-triple=spirv64-amd-amdhsa" "-emit-obj" 
{{.*}} "[[LINKED_OUTPUT]]" "-o" "a.out"
diff --git a/clang/test/Driver/spirv-openmp-toolchain.c 
b/clang/test/Driver/spirv-openmp-toolchain.c
index a409ec17c0daf..2c75fffc4d347 100644
--- a/clang/test/Driver/spirv-openmp-toolchain.c
+++ b/clang/test/Driver/spirv-openmp-toolchain.c
@@ -50,7 +50,7 @@
 // CHECK-BINDINGS-TEMPS: "x86_64-unknown-linux-gnu" - "Offload::Linker", 
inputs: ["[[HOST_OBJ]]"], output: "a.out"
 
 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S 
-fopenmp=libomp -fopenmp-targets=spirv64-intel -nogpulib %s 2>&1 | FileCheck %s 
--check-prefix=CHECK-EMIT-LLVM-IR
-// CHECK-EMIT-LLVM-IR: "-cc1" "-triple" "spirv64-intel"{{.*}}"-emit-llvm-bc"
+// CHECK-EMIT-LLVM-IR: "-cc1" "-triple" "spirv64-intel"{{.*}}"-emit-llvm"
 
 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp 
-fopenmp-targets=spirv64-intel \
 // RUN: --sysroot=%S/Inputs/spirv-openmp/ %s 2>&1 | FileCheck 
--check-prefix=CHECK-GPULIB %s

>From a15fb4b3420e0781058ffae51dad3a1e3ae648d5 Mon Sep 17 00:00:00 2001
From: Joseph Huber <[email protected]>
Date: Wed, 3 Jun 2026 18:19:46 -0500
Subject: [PATCH 2/2] Fix noRDC LTO hack thingy

---
 clang/lib/Driver/ToolChains/Clang.cpp      | 4 +++-
 clang/test/Driver/hip-toolchain-no-rdc.hip | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 25bf84a2bde33..63a9472594b76 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -9721,7 +9721,9 @@ void LinkerWrapper::ConstructJob(Compilation &C, const 
JobAction &JA,
             options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ,
             options::OPT_fprofile_instr_generate,
             options::OPT_fprofile_instr_generate_EQ);
-        if (TC->getLTOMode(Args, Kind) == LTOK_None && !UsesProfileGenerate)
+        if (!Args.hasArg(options::OPT_foffload_lto_EQ,
+                         options::OPT_fno_offload_lto) &&
+            !UsesProfileGenerate)
           CmdArgs.push_back("--no-lto");
       }
     }
diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip 
b/clang/test/Driver/hip-toolchain-no-rdc.hip
index 650c4a108e507..591342df3839c 100644
--- a/clang/test/Driver/hip-toolchain-no-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-no-rdc.hip
@@ -115,6 +115,7 @@
 
 // NEW: [[WRAPPER:".*clang-linker-wrapper]]"
 // NEW-SAME: "--device-compiler=amdgcn-amd-amdhsa=-flto=full"
+// NEW-SAME: "--no-lto"
 // NEW-SAME: "--host-triple=x86_64-unknown-linux-gnu"
 // NEW-SAME: "--emit-fatbin-only"
 // NEW-SAME: "-o" "[[HIPFB_A:.*.hipfb]]" "[[PACKAGE_A]]"
@@ -189,6 +190,7 @@
 
 // NEW: [[WRAPPER:".*clang-linker-wrapper]]"
 // NEW-SAME: "--device-compiler=amdgcn-amd-amdhsa=-flto=full"
+// NEW-SAME: "--no-lto"
 // NEW-SAME: "--emit-fatbin-only"
 // NEW-SAME: "-o" "[[HIPFB_B:.*.hipfb]]" "[[PACKAGE_B]]"
 

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to