jhuber6 created this revision. jhuber6 added reviewers: jdoerfert, JonChesterfield, yaxunl, saiislam, tianshilei1992, tra. Herald added subscribers: kerbowa, guansong, jvesely. Herald added a project: All. jhuber6 requested review of this revision. Herald added subscribers: cfe-commits, sstefan1, MaskRay. Herald added a project: clang.
This patch adds support for OpenMP to use the `--offload-arch` and `--no-offload-arch` options. Traditionally, OpenMP has only supported compiling for a single architecture via the `-Xopenmp-target` option. Now we can pass in a bound architecture and use that if given, otherwise we default to the value of the `-march` option as before. Note that this only applies the basic support, the OpenMP target runtime does not yet know how to choose between multiple architectures. Additionally other parts of the offloading toolchain (e.g. LTO) require the `-march` option, these should be worked out later. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D124721 Files: clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/lib/Driver/ToolChains/Cuda.cpp clang/test/Driver/amdgpu-openmp-toolchain-new.c clang/test/Driver/openmp-offload-gpu-new.c
Index: clang/test/Driver/openmp-offload-gpu-new.c =================================================================== --- clang/test/Driver/openmp-offload-gpu-new.c +++ clang/test/Driver/openmp-offload-gpu-new.c @@ -10,6 +10,10 @@ // RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \ // RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \ // RUN: | FileCheck %s +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: --offload-arch=sm_52 \ +// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \ +// RUN: | FileCheck %s // verify the tools invocations // CHECK: clang{{.*}}"-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c" @@ -40,6 +44,15 @@ // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ]]"], output: "[[HOST_OBJ:.*]]" // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]" +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[HOST_OBJ:.*]]" +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" + // RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR // CHECK-EMIT-LLVM-IR: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm" Index: clang/test/Driver/amdgpu-openmp-toolchain-new.c =================================================================== --- clang/test/Driver/amdgpu-openmp-toolchain-new.c +++ clang/test/Driver/amdgpu-openmp-toolchain-new.c @@ -3,6 +3,9 @@ // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ // RUN: -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \ // RUN: | FileCheck %s +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ +// RUN: --offload-arch=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \ +// RUN: | FileCheck %s // verify the tools invocations // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c" @@ -34,6 +37,7 @@ // CHECK-NOGPULIB-NOT: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgpu-gfx803.bc"{{.*}} // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]" // CHECK-BINDINGS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.*]]" // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_BC]]"], output: "[[HOST_OBJ:.*]]" Index: clang/lib/Driver/ToolChains/Cuda.cpp =================================================================== --- clang/lib/Driver/ToolChains/Cuda.cpp +++ clang/lib/Driver/ToolChains/Cuda.cpp @@ -847,10 +847,10 @@ if (!llvm::is_contained(*DAL, A)) DAL->append(A); - StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ); - if (Arch.empty()) + if (!DAL->hasArg(options::OPT_march_EQ)) DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), - CLANG_OPENMP_NVPTX_DEFAULT_ARCH); + !BoundArch.empty() ? BoundArch + : CLANG_OPENMP_NVPTX_DEFAULT_ARCH); return DAL; } Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -307,9 +307,10 @@ if (!llvm::is_contained(*DAL, A)) DAL->append(A); - std::string Arch = DAL->getLastArgValue(options::OPT_march_EQ).str(); - if (Arch.empty()) { - checkSystemForAMDGPU(Args, *this, Arch); + if (!DAL->hasArg(options::OPT_march_EQ)) { + std::string Arch = BoundArch.str(); + if (BoundArch.empty()) + checkSystemForAMDGPU(Args, *this, Arch); DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch); } Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -4215,15 +4215,18 @@ static StringRef getCanonicalArchString(Compilation &C, llvm::opt::DerivedArgList &Args, StringRef ArchStr, - Action::OffloadKind Kind) { - if (Kind == Action::OFK_Cuda) { + Action::OffloadKind Kind, + const ToolChain *TC) { + if (Kind == Action::OFK_Cuda || + (Kind == Action::OFK_OpenMP && TC->getTriple().isNVPTX())) { CudaArch Arch = StringToCudaArch(ArchStr); if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) { C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; return StringRef(); } return Args.MakeArgStringRef(CudaArchToString(Arch)); - } else if (Kind == Action::OFK_HIP) { + } else if (Kind == Action::OFK_HIP || + (Kind == Action::OFK_OpenMP && TC->getTriple().isAMDGPU())) { llvm::StringMap<bool> Features; // getHIPOffloadTargetTriple() is known to return valid value as it has // been called successfully in the CreateOffloadingDeviceToolChains(). @@ -4259,11 +4262,7 @@ /// architctures we return a set containing only the empty string. static llvm::DenseSet<StringRef> getOffloadArchs(Compilation &C, llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind) { - - // If this is OpenMP offloading we don't use a bound architecture. - if (Kind == Action::OFK_OpenMP) - return llvm::DenseSet<StringRef>{StringRef()}; + Action::OffloadKind Kind, const ToolChain *TC) { // --offload and --offload-arch options are mutually exclusive. if (Args.hasArgNoClaim(options::OPT_offload_EQ) && @@ -4279,12 +4278,12 @@ llvm::DenseSet<StringRef> Archs; for (auto &Arg : Args) { if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { - Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind)); + Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC)); } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) { if (Arg->getValue() == StringRef("all")) Archs.clear(); else - Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind)); + Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind, TC)); } } @@ -4300,6 +4299,11 @@ Archs.insert(CudaArchToString(CudaArch::CudaDefault)); else if (Kind == Action::OFK_HIP) Archs.insert(CudaArchToString(CudaArch::HIPDefault)); + else if (Kind == Action::OFK_OpenMP) + Archs.insert(StringRef()); + } else { + Args.ClaimAllArgs(options::OPT_offload_arch_EQ); + Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ); } return Archs; @@ -4345,7 +4349,7 @@ // Get the product of all bound architectures and toolchains. SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs; for (const ToolChain *TC : ToolChains) - for (StringRef Arch : getOffloadArchs(C, Args, Kind)) + for (StringRef Arch : getOffloadArchs(C, Args, Kind, TC)) TCAndArchs.push_back(std::make_pair(TC, Arch)); for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I) @@ -4374,9 +4378,9 @@ HostAction->setCannotBeCollapsedWithNextDependentAction(); OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), - /*BoundArch=*/nullptr, Kind); + TCAndArch->second.data(), Kind); OffloadAction::DeviceDependences DDep; - DDep.add(*A, *TCAndArch->first, /*BoundArch=*/nullptr, Kind); + DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); A = C.MakeAction<OffloadAction>(HDep, DDep); } else if (isa<AssembleJobAction>(A) && Kind == Action::OFK_Cuda) { // The Cuda toolchain uses fatbinary as the linker phase to bundle the
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits