yaxunl updated this revision to Diff 265025. yaxunl retitled this revision from "[HIP] Support -offloading-target-id" to "[HIP] Support target id by --offload-arch". yaxunl edited the summary of this revision. yaxunl added a comment.
rebased the patch and revised by passing target id by `--offload-arch`. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D60620/new/ https://reviews.llvm.org/D60620 Files: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Basic/HIP.h clang/lib/Basic/CMakeLists.txt clang/lib/Basic/HIP.cpp clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChains/HIP.cpp clang/test/Driver/hip-invalid-target-id.hip clang/test/Driver/hip-target-id.hip
Index: clang/test/Driver/hip-target-id.hip =================================================================== --- /dev/null +++ clang/test/Driver/hip-target-id.hip @@ -0,0 +1,56 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908+xnack+sramecc \ +// RUN: --offload-arch=gfx908+xnack-sramecc \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck %s + +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx908" + +// CHECK: [[OPT:".*opt"]] {{".*-gfx908-linked.*bc"}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx908" +// CHECK-SAME: "-o" [[OPT_906_BC:".*-gfx908-optimized.*bc"]] + +// CHECK: [[LLC: ".*llc"]] [[OPT_906_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-mcpu=gfx908" +// CHECK-SAME: "-o" {{".*-gfx908-.*o"}} + +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx908" "-mxnack" "-msram-ecc" + +// CHECK: [[OPT]] {{".*-gfx908\+xnack\+sramecc.*bc"}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx908" +// CHECK-SAME: "-o" [[OPT_906XE_BC:".*-gfx908\+xnack\+sramecc.*bc"]] + +// CHECK: [[LLC]] [[OPT_906XE_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-mcpu=gfx908" +// CHECK-SAME: "-o" {{".*-gfx908\+xnack\+sramecc.*o"}} + +// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" +// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-emit-llvm-bc" +// CHECK-SAME: {{.*}} "-target-cpu" "gfx908" "-mxnack" "-mno-sram-ecc" + +// CHECK: [[OPT]] {{".*-gfx908\+xnack.*bc"}} "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: "-mcpu=gfx908" +// CHECK-SAME: "-o" [[OPT_906XN_BC:".*-gfx908\+xnack.*bc"]] + +// CHECK: [[LLC]] [[OPT_906XN_BC]] +// CHECK-SAME: "-mtriple=amdgcn-amd-amdhsa" +// CHECK-SAME: {{.*}} "-mcpu=gfx908" +// CHECK-SAME: "-o" {{".*-gfx908\+xnack.*o"}} + + +// CHECK: {{".*clang-offload-bundler"}} +// CHECK-SAME: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa-gfx908,hip-amdgcn-amd-amdhsa-gfx908+xnack+sramecc,hip-amdgcn-amd-amdhsa-gfx908+xnack-sramecc" Index: clang/test/Driver/hip-invalid-target-id.hip =================================================================== --- /dev/null +++ clang/test/Driver/hip-invalid-target-id.hip @@ -0,0 +1,48 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=NOPLUS %s + +// NOPLUS: error: Unsupported CUDA gpu architecture + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908+sramecc+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=ORDER %s + +// ORDER: error: Invalid HIP offloading target id: gfx908+sramecc+xnack + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908+unknown \ +// RUN: --offload-arch=gfx908+sramecc+unknown \ +// RUN: --offload-arch=gfx900+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNK %s + +// UNK: error: Invalid HIP offloading target id: gfx908+unknown + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx908+sramecc+unknown \ +// RUN: --offload-arch=gfx900+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=MIXED %s + +// MIXED: error: Invalid HIP offloading target id: gfx908+sramecc+unknown + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx908 \ +// RUN: --offload-arch=gfx900+xnack \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_dev_lib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=UNSUP %s + +// UNSUP: error: Invalid HIP offloading target id: gfx900+xnack + + Index: clang/lib/Driver/ToolChains/HIP.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIP.cpp +++ clang/lib/Driver/ToolChains/HIP.cpp @@ -11,6 +11,7 @@ #include "CommonArgs.h" #include "InputInfo.h" #include "clang/Basic/Cuda.h" +#include "clang/Basic/HIP.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" @@ -117,6 +118,9 @@ Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, llvm::StringRef OutputFilePrefix, const char *InputFileName) const { + auto Pos = SubArchName.find_first_of("+-"); + if (Pos != SubArchName.npos) + SubArchName = SubArchName.substr(0, Pos); // Construct opt command. ArgStringList OptArgs; // The input to opt is the output from llvm-link. @@ -145,6 +149,9 @@ const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, llvm::StringRef OutputFilePrefix, const char *InputFileName, bool OutputIsAsm) const { + auto Pos = SubArchName.find_first_of("+-"); + if (Pos != SubArchName.npos) + SubArchName = SubArchName.substr(0, Pos); // Construct llc command. ArgStringList LlcArgs; // The input to llc is the output from opt. @@ -282,6 +289,12 @@ HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); + StringRef FeatureStr; + auto Pos = GpuArch.find_first_of("+-"); + if (Pos != GpuArch.npos) { + FeatureStr = GpuArch.drop_front(Pos); + GpuArch = GpuArch.substr(0, Pos); + } assert(!GpuArch.empty() && "Must have an explicit GPU arch."); (void) GpuArch; assert(DeviceOffloadingKind == Action::OFK_HIP && @@ -290,6 +303,38 @@ CC1Args.push_back("-target-cpu"); CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); + + // If target id contains +feature, map it to true. + // If target id contains -feature, map it to false. + // If target id does not contain feature (default), do not map it. + llvm::StringMap<bool> FeatureMap; + while (!FeatureStr.empty()) { + auto Pos = FeatureStr.find_first_of("+-", 1); + if (Pos == FeatureStr.npos) + Pos = FeatureStr.size(); + FeatureMap[FeatureStr.substr(1, Pos - 1)] = FeatureStr.front() == '+'; + FeatureStr = FeatureStr.drop_front(Pos); + } + + // Iterate through all possible target id features for the given GPU. + // If it is mapped to true, pass -mfeature to clang -cc1. + // If it is mapped to false, pass -mno-feature to clang -cc1. + // If it is not in the map (default), do not pass it to clang -cc1. + for (auto Feature : HIP::getAllPossibleTargetIdFeatures(GpuArch)) { + std::string Opt = "-m"; + auto Pos = FeatureMap.find(Feature); + if (Pos == FeatureMap.end()) + continue; + auto FeatureOptName = Feature; + if (Feature == "sramecc") + FeatureOptName = "sram-ecc"; + if (Pos->second) + Opt = Opt + FeatureOptName.str(); + else + Opt = Opt + "no-" + FeatureOptName.str(); + CC1Args.push_back(DriverArgs.MakeArgStringRef(Opt)); + } + CC1Args.push_back("-fcuda-is-device"); if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -45,6 +45,7 @@ #include "ToolChains/TCE.h" #include "ToolChains/WebAssembly.h" #include "ToolChains/XCore.h" +#include "clang/Basic/HIP.h" #include "clang/Basic/Version.h" #include "clang/Config/config.h" #include "clang/Driver/Action.h" @@ -2355,8 +2356,20 @@ bool EmitLLVM = false; bool EmitAsm = false; + /// Id to identify each device compilation. For CUDA it is simply the + /// GPU arch string. For HIP it is either the GPU arch string or GPU + /// arch string plus feature strings delimited by a plus sign, e.g. + /// gfx906+xnack. + struct TargetId { + /// Target id string which is persistent throughout the compilation. + const char *Id; + TargetId(CudaArch Arch) { Id = CudaArchToString(Arch); } + TargetId(const char *Id) : Id(Id) {} + operator const char *() { return Id; } + operator StringRef() { return StringRef(Id); } + }; /// List of GPU architectures to use in this compilation. - SmallVector<CudaArch, 4> GpuArchList; + SmallVector<TargetId, 4> GpuArchList; /// The CUDA actions for the current input. ActionList CudaDeviceActions; @@ -2439,7 +2452,7 @@ for (auto Arch : GpuArchList) { CudaDeviceActions.push_back(UA); - UA->registerDependentActionInfo(ToolChains[0], CudaArchToString(Arch), + UA->registerDependentActionInfo(ToolChains[0], Arch, AssociatedOffloadKind); } return ABRT_Success; @@ -2450,10 +2463,9 @@ void appendTopLevelActions(ActionList &AL) override { // Utility to append actions to the top level list. - auto AddTopLevel = [&](Action *A, CudaArch BoundArch) { + auto AddTopLevel = [&](Action *A, TargetId TargetId) { OffloadAction::DeviceDependences Dep; - Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch), - AssociatedOffloadKind); + Dep.add(*A, *ToolChains.front(), TargetId, AssociatedOffloadKind); AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType())); }; @@ -2481,6 +2493,8 @@ CudaDeviceActions.clear(); } + virtual bool IsValidOffloadArch(StringRef Arch) const = 0; + bool initialize() override { assert(AssociatedOffloadKind == Action::OFK_Cuda || AssociatedOffloadKind == Action::OFK_HIP); @@ -2528,7 +2542,7 @@ EmitAsm = Args.getLastArg(options::OPT_S); // Collect all cuda_gpu_arch parameters, removing duplicates. - std::set<CudaArch> GpuArchs; + std::set<StringRef> GpuArchs; bool Error = false; for (Arg *A : Args) { if (!(A->getOption().matches(options::OPT_offload_arch_EQ) || @@ -2542,21 +2556,19 @@ GpuArchs.clear(); continue; } - CudaArch Arch = StringToCudaArch(ArchStr); - if (Arch == CudaArch::UNKNOWN) { - C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + if (!IsValidOffloadArch(ArchStr)) { Error = true; } else if (A->getOption().matches(options::OPT_offload_arch_EQ)) - GpuArchs.insert(Arch); + GpuArchs.insert(ArchStr); else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) - GpuArchs.erase(Arch); + GpuArchs.erase(ArchStr); else llvm_unreachable("Unexpected option."); } // Collect list of GPUs remaining in the set. - for (CudaArch Arch : GpuArchs) - GpuArchList.push_back(Arch); + for (auto Arch : GpuArchs) + GpuArchList.push_back(Arch.data()); // Default to sm_20 which is the lowest common denominator for // supported GPUs. sm_20 code should work correctly, if @@ -2578,6 +2590,15 @@ DefaultCudaArch = CudaArch::SM_20; } + bool IsValidOffloadArch(StringRef ArchStr) const override { + CudaArch Arch = StringToCudaArch(ArchStr); + if (Arch == CudaArch::UNKNOWN) { + C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + return false; + } + return true; + } + ActionBuilderReturnCode getDeviceDependences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, phases::ID FinalPhase, @@ -2637,8 +2658,7 @@ for (auto &A : {AssembleAction, BackendAction}) { OffloadAction::DeviceDependences DDep; - DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]), - Action::OFK_Cuda); + DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda); DeviceActions.push_back( C.MakeAction<OffloadAction>(DDep, A->getType())); } @@ -2697,6 +2717,47 @@ bool canUseBundlerUnbundler() const override { return true; } + bool IsValidOffloadArch(StringRef IdStr) const override { + // Return a pair of StringRef separated by the first occurrence of '+' + // or '-'. + auto ParseTargetId = [](StringRef S) { + std::pair<StringRef, StringRef> Ret; + auto Pos = S.find_first_of("+-"); + if (Pos == S.npos) { + Ret.first = S; + return Ret; + } + Ret.first = S.substr(0, Pos); + Ret.second = S.drop_front(Pos + 1); + return Ret; + }; + auto Split = ParseTargetId(IdStr); + const StringRef ArchStr = Split.first; + CudaArch Arch = StringToCudaArch(ArchStr); + if (Arch == CudaArch::UNKNOWN) { + C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + return false; + } + StringRef Features = Split.second; + if (Features.empty()) + return true; + auto AllFeatures = HIP::getAllPossibleTargetIdFeatures(ArchStr); + unsigned CurIndex = 0; + while (!Features.empty()) { + auto Splits = ParseTargetId(Features); + for (; CurIndex < AllFeatures.size(); ++CurIndex) { + if (Splits.first == AllFeatures[CurIndex]) + break; + } + if (CurIndex == AllFeatures.size()) { + C.getDriver().Diag(clang::diag::err_drv_hip_bad_target_id) << IdStr; + return false; + } + Features = Splits.second; + } + return true; + }; + ActionBuilderReturnCode getDeviceDependences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, phases::ID FinalPhase, @@ -2738,8 +2799,8 @@ // device arch of the next action being propagated to the above link // action. OffloadAction::DeviceDependences DDep; - DDep.add(*CudaDeviceActions[I], *ToolChains.front(), - CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I], + AssociatedOffloadKind); CudaDeviceActions[I] = C.MakeAction<OffloadAction>( DDep, CudaDeviceActions[I]->getType()); } @@ -2795,8 +2856,8 @@ for (auto &LI : DeviceLinkerInputs) { auto *DeviceLinkAction = C.MakeAction<LinkJobAction>(LI, types::TY_Image); - DA.add(*DeviceLinkAction, *ToolChains[0], - CudaArchToString(GpuArchList[I]), AssociatedOffloadKind); + DA.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I], + AssociatedOffloadKind); ++I; } } Index: clang/lib/Basic/HIP.cpp =================================================================== --- /dev/null +++ clang/lib/Basic/HIP.cpp @@ -0,0 +1,26 @@ +//===--- HIP.cpp - Utilities for compiling HIP code ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/HIP.h" + +namespace clang { + +namespace HIP { + +const llvm::SmallVector<llvm::StringRef, 4> +getAllPossibleTargetIdFeatures(llvm::StringRef Device) { + llvm::SmallVector<llvm::StringRef, 4> Ret; + if (Device == "gfx902" || Device == "gfx908" || Device == "gfx909" || + Device.startswith("gfx10")) + Ret.push_back("xnack"); + if (Device == "gfx906" || Device == "gfx908" || Device == "gfx909") + Ret.push_back("sramecc"); + return Ret; +} +} // namespace HIP +} // namespace clang Index: clang/lib/Basic/CMakeLists.txt =================================================================== --- clang/lib/Basic/CMakeLists.txt +++ clang/lib/Basic/CMakeLists.txt @@ -48,6 +48,7 @@ FileManager.cpp FileSystemStatCache.cpp FixedPoint.cpp + HIP.cpp IdentifierTable.cpp LangOptions.cpp LangStandards.cpp Index: clang/include/clang/Basic/HIP.h =================================================================== --- /dev/null +++ clang/include/clang/Basic/HIP.h @@ -0,0 +1,30 @@ +//===--- HIP.h - Utilities for compiling HIP code --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_HIP_H +#define LLVM_CLANG_BASIC_HIP_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" + +namespace clang { + +namespace HIP { +/// Get all feature strings that can be used in target id for \m Device. +/// Target id is a device name with optional target id feature strings delimited +/// by a plus or minus sign, e.g. gfx906+xnack-sramecc. Target id feature itself +/// cannot contain plus or minus sign. Each device have limited number of +/// predefined target id features which have to follow predefined order when +/// showing up in a target id. +const llvm::SmallVector<llvm::StringRef, 4> +getAllPossibleTargetIdFeatures(llvm::StringRef Device); + +} // namespace HIP +} // namespace clang + +#endif Index: clang/include/clang/Basic/DiagnosticDriverKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticDriverKinds.td +++ clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -65,6 +65,7 @@ InGroup<CudaUnknownVersion>; def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">; def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">; +def err_drv_hip_bad_target_id : Error<"Invalid HIP offloading target id: %0">; def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; def err_drv_invalid_linker_name : Error<
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits