Author: Freddy Ye Date: 2024-05-30T14:47:47+08:00 New Revision: 73f4c2547dc3d1b6a453d3c4388648b122554dd1
URL: https://github.com/llvm/llvm-project/commit/73f4c2547dc3d1b6a453d3c4388648b122554dd1 DIFF: https://github.com/llvm/llvm-project/commit/73f4c2547dc3d1b6a453d3c4388648b122554dd1.diff LOG: [X86] Support EGPR for inline assembly. (#92338) "jR": explicitly enables EGPR "r", "l", "q": enables/disables EGPR w/wo -mapx-inline-asm-use-gpr32 "jr": explicitly enables GPR with -mapx-inline-asm-use-gpr32 -mapx-inline-asm-use-gpr32 will also define a new macro: `__APX_INLINE_ASM_USE_GPR32__` GCC patches: https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631183.html https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631186.html [[PATCH v2] x86: Define _APX_INLINE_ASM_USE_GPR32_ (gnu.org)](https://gcc.gnu.org/pipermail/gcc-patches/2024-April/649003.html) Reference: https://gcc.godbolt.org/z/nPPvbY6r4 Added: clang/test/Driver/x86-apx-inline-asm-use-gpr32.cpp llvm/test/CodeGen/X86/apx/asm-constraint-jR.ll llvm/test/CodeGen/X86/apx/asm-constraint-jr.ll Modified: clang/include/clang/Driver/Options.td clang/lib/Basic/Targets/X86.cpp clang/lib/Basic/Targets/X86.h clang/lib/Driver/ToolChains/Arch/X86.cpp clang/test/Preprocessor/x86_target_features.c llvm/docs/LangRef.rst llvm/lib/Target/X86/X86.td llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/CodeGen/X86/apx/asm-constraint.ll Removed: ################################################################################ diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 4119e69c85540..1637a114fcce1 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6280,6 +6280,8 @@ def mno_apx_features_EQ : CommaJoined<["-"], "mno-apx-features=">, Group<m_x86_F // For stability, we only add a feature to -mapxf after it passes the validation of llvm-test-suite && cpu2017 on Intel SDE. def mapxf : Flag<["-"], "mapxf">, Alias<mapx_features_EQ>, AliasArgs<["egpr","push2pop2","ppx","ndd","ccmp","nf"]>; def mno_apxf : Flag<["-"], "mno-apxf">, Alias<mno_apx_features_EQ>, AliasArgs<["egpr","push2pop2","ppx","ndd","ccmp","nf"]>; +def mapx_inline_asm_use_gpr32 : Flag<["-"], "mapx-inline-asm-use-gpr32">, Group<m_Group>, + HelpText<"Enable use of GPR32 in inline assembly for APX">; } // let Flags = [TargetSpecific] // VE feature flags diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 08e44360bfbe3..34d249ed27ce5 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -441,6 +441,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasFullBFloat16 = true; } else if (Feature == "+egpr") { HasEGPR = true; + } else if (Feature == "+inline-asm-use-gpr32") { + HasInlineAsmUseGPR32 = true; } else if (Feature == "+push2pop2") { HasPush2Pop2 = true; } else if (Feature == "+ppx") { @@ -963,6 +965,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, // Condition here is aligned with the feature set of mapxf in Options.td if (HasEGPR && HasPush2Pop2 && HasPPX && HasNDD && HasCCMP && HasNF) Builder.defineMacro("__APX_F__"); + if (HasEGPR && HasInlineAsmUseGPR32) + Builder.defineMacro("__APX_INLINE_ASM_USE_GPR32__"); // Each case falls through to the previous one here. switch (SSELevel) { @@ -1478,6 +1482,18 @@ bool X86TargetInfo::validateAsmConstraint( case 'C': // SSE floating point constant. case 'G': // x87 floating point constant. return true; + case 'j': + Name++; + switch (*Name) { + default: + return false; + case 'r': + Info.setAllowsRegister(); + return true; + case 'R': + Info.setAllowsRegister(); + return true; + } case '@': // CC condition changes. if (auto Len = matchAsmCCConstraint(Name)) { @@ -1749,6 +1765,20 @@ std::string X86TargetInfo::convertConstraint(const char *&Constraint) const { // to the next constraint. return std::string("^") + std::string(Constraint++, 2); } + case 'j': + switch (Constraint[1]) { + default: + // Break from inner switch and fall through (copy single char), + // continue parsing after copying the current constraint into + // the return string. + break; + case 'r': + case 'R': + // "^" hints llvm that this is a 2 letter constraint. + // "Constraint++" is used to promote the string iterator + // to the next constraint. + return std::string("^") + std::string(Constraint++, 2); + } [[fallthrough]]; default: return std::string(1, *Constraint); diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 0633b7e0da96a..9b2ae87adb2e7 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -172,6 +172,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasCCMP = false; bool HasNF = false; bool HasCF = false; + bool HasInlineAsmUseGPR32 = false; protected: llvm::X86::CPUKind CPU = llvm::X86::CK_None; diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index 8295d001ec6f7..75f9c99d5d0bf 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -310,4 +310,6 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, Features.push_back("+prefer-no-gather"); if (Args.hasArg(options::OPT_mno_scatter)) Features.push_back("+prefer-no-scatter"); + if (Args.hasArg(options::OPT_mapx_inline_asm_use_gpr32)) + Features.push_back("+inline-asm-use-gpr32"); } diff --git a/clang/test/Driver/x86-apx-inline-asm-use-gpr32.cpp b/clang/test/Driver/x86-apx-inline-asm-use-gpr32.cpp new file mode 100644 index 0000000000000..a45140d96e66c --- /dev/null +++ b/clang/test/Driver/x86-apx-inline-asm-use-gpr32.cpp @@ -0,0 +1,3 @@ +/// Tests -mapx-inline-asm-use-gpr32 +// RUN: %clang -target x86_64-unknown-linux-gnu -c -mapx-inline-asm-use-gpr32 -### %s 2>&1 | FileCheck --check-prefix=GPR32 %s +// GPR32: "-target-feature" "+inline-asm-use-gpr32" diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index 6c08b379c9386..3e63e2c77fddf 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -763,3 +763,8 @@ // NF: #define __NF__ 1 // PPX: #define __PPX__ 1 // PUSH2POP2: #define __PUSH2POP2__ 1 + +// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=NOUSEGPR32 %s +// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-features=egpr -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=USEGPR32 %s +// NOUSEGPR32-NOT: #define __APX_INLINE_ASM_USE_GPR32__ 1 +// USEGPR32: #define __APX_INLINE_ASM_USE_GPR32__ 1 diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 7b64c477d13c7..c58f7f7140e47 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -5428,10 +5428,12 @@ X86: - ``Z``: An immediate 32-bit unsigned integer. - ``q``: An 8, 16, 32, or 64-bit register which can be accessed as an 8-bit ``l`` integer register. On X86-32, this is the ``a``, ``b``, ``c``, and ``d`` - registers, and on X86-64, it is all of the integer registers. + registers, and on X86-64, it is all of the integer registers. When feature + `egpr` and `inline-asm-use-gpr32` are both on, it will be extended to gpr32. - ``Q``: An 8, 16, 32, or 64-bit register which can be accessed as an 8-bit ``h`` integer register. This is the ``a``, ``b``, ``c``, and ``d`` registers. -- ``r`` or ``l``: An 8, 16, 32, or 64-bit integer register. +- ``r`` or ``l``: An 8, 16, 32, or 64-bit integer register. When feature + `egpr` and `inline-asm-use-gpr32` are both on, it will be extended to gpr32. - ``R``: An 8, 16, 32, or 64-bit "legacy" integer register -- one which has existed since i386, and can be accessed without the REX prefix. - ``f``: A 32, 64, or 80-bit '387 FPU stack pseudo-register. @@ -5452,6 +5454,10 @@ X86: operand will get allocated only to RAX -- if two 32-bit operands are needed, you're better off splitting it yourself, before passing it to the asm statement. +- ``jr``: An 8, 16, 32, or 64-bit integer gpr16. It won't be extended to gpr32 + when feature `egpr` or `inline-asm-use-gpr32` is on. +- ``jR``: An 8, 16, 32, or 64-bit integer gpr32 when feature `egpr`` is on. + Otherwise, same as ``r``. XCore: diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 7e8133e3e1ac4..628ff560017ed 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -346,6 +346,9 @@ def FeatureNF : SubtargetFeature<"nf", "HasNF", "true", "Support status flags update suppression">; def FeatureCF : SubtargetFeature<"cf", "HasCF", "true", "Support conditional faulting">; +def FeatureUseGPR32InInlineAsm + : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true", + "Enable use of GPR32 in inline assembly for APX">; // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka // "string operations"). See "REP String Enhancement" in the Intel Software diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ac30e8846be55..f5d0e1b15d7a3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -57840,6 +57840,15 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const { case '2': return C_RegisterClass; } + break; + case 'j': + switch (Constraint[1]) { + default: + break; + case 'r': + case 'R': + return C_RegisterClass; + } } } else if (parseConstraintCode(Constraint) != X86::COND_INVALID) return C_Other; @@ -57919,6 +57928,19 @@ X86TargetLowering::getSingleConstraintMatchWeight( break; } break; + case 'j': + if (StringRef(Constraint).size() != 2) + break; + switch (Constraint[1]) { + default: + return CW_Invalid; + case 'r': + case 'R': + if (CallOperandVal->getType()->isIntegerTy()) + Wt = CW_SpecificReg; + break; + } + break; case 'v': if ((Ty->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()) Wt = CW_Register; @@ -58218,6 +58240,10 @@ static bool isVKClass(const TargetRegisterClass &RC) { RC.hasSuperClassEq(&X86::VK64RegClass); } +static bool useEGPRInlineAsm(const X86Subtarget &Subtarget) { + return Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32(); +} + std::pair<unsigned, const TargetRegisterClass *> X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, @@ -58258,13 +58284,21 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. if (Subtarget.is64Bit()) { if (VT == MVT::i8 || VT == MVT::i1) - return std::make_pair(0U, &X86::GR8_NOREX2RegClass); + return std::make_pair(0U, useEGPRInlineAsm(Subtarget) + ? &X86::GR8RegClass + : &X86::GR8_NOREX2RegClass); if (VT == MVT::i16) - return std::make_pair(0U, &X86::GR16_NOREX2RegClass); + return std::make_pair(0U, useEGPRInlineAsm(Subtarget) + ? &X86::GR16RegClass + : &X86::GR16_NOREX2RegClass); if (VT == MVT::i32 || VT == MVT::f32) - return std::make_pair(0U, &X86::GR32_NOREX2RegClass); + return std::make_pair(0U, useEGPRInlineAsm(Subtarget) + ? &X86::GR32RegClass + : &X86::GR32_NOREX2RegClass); if (VT != MVT::f80 && !VT.isVector()) - return std::make_pair(0U, &X86::GR64_NOREX2RegClass); + return std::make_pair(0U, useEGPRInlineAsm(Subtarget) + ? &X86::GR64RegClass + : &X86::GR64_NOREX2RegClass); break; } [[fallthrough]]; @@ -58283,14 +58317,22 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case 'r': // GENERAL_REGS case 'l': // INDEX_REGS if (VT == MVT::i8 || VT == MVT::i1) - return std::make_pair(0U, &X86::GR8_NOREX2RegClass); + return std::make_pair(0U, useEGPRInlineAsm(Subtarget) + ? &X86::GR8RegClass + : &X86::GR8_NOREX2RegClass); if (VT == MVT::i16) - return std::make_pair(0U, &X86::GR16_NOREX2RegClass); + return std::make_pair(0U, useEGPRInlineAsm(Subtarget) + ? &X86::GR16RegClass + : &X86::GR16_NOREX2RegClass); if (VT == MVT::i32 || VT == MVT::f32 || (!VT.isVector() && !Subtarget.is64Bit())) - return std::make_pair(0U, &X86::GR32_NOREX2RegClass); + return std::make_pair(0U, useEGPRInlineAsm(Subtarget) + ? &X86::GR32RegClass + : &X86::GR32_NOREX2RegClass); if (VT != MVT::f80 && !VT.isVector()) - return std::make_pair(0U, &X86::GR64_NOREX2RegClass); + return std::make_pair(0U, useEGPRInlineAsm(Subtarget) + ? &X86::GR64RegClass + : &X86::GR64_NOREX2RegClass); break; case 'R': // LEGACY_REGS if (VT == MVT::i8 || VT == MVT::i1) @@ -58514,6 +58556,31 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } break; } + } else if (Constraint.size() == 2 && Constraint[0] == 'j') { + switch (Constraint[1]) { + default: + break; + case 'r': + if (VT == MVT::i8 || VT == MVT::i1) + return std::make_pair(0U, &X86::GR8_NOREX2RegClass); + if (VT == MVT::i16) + return std::make_pair(0U, &X86::GR16_NOREX2RegClass); + if (VT == MVT::i32 || VT == MVT::f32) + return std::make_pair(0U, &X86::GR32_NOREX2RegClass); + if (VT != MVT::f80 && !VT.isVector()) + return std::make_pair(0U, &X86::GR64_NOREX2RegClass); + break; + case 'R': + if (VT == MVT::i8 || VT == MVT::i1) + return std::make_pair(0U, &X86::GR8RegClass); + if (VT == MVT::i16) + return std::make_pair(0U, &X86::GR16RegClass); + if (VT == MVT::i32 || VT == MVT::f32) + return std::make_pair(0U, &X86::GR32RegClass); + if (VT != MVT::f80 && !VT.isVector()) + return std::make_pair(0U, &X86::GR64RegClass); + break; + } } if (parseConstraintCode(Constraint) != X86::COND_INVALID) diff --git a/llvm/test/CodeGen/X86/apx/asm-constraint-jR.ll b/llvm/test/CodeGen/X86/apx/asm-constraint-jR.ll new file mode 100644 index 0000000000000..32b84915c6793 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/asm-constraint-jR.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: not llc -mtriple=x86_64 %s 2>&1 | FileCheck %s --check-prefix=ERR +; RUN: llc -mtriple=x86_64 -mattr=+egpr < %s | FileCheck %s +; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s +; RUN: not llc -mtriple=x86_64 -mattr=+inline-asm-use-gpr32 %s 2>&1 | FileCheck %s --check-prefix=ERR + +; ERR: error: inline assembly requires more registers than available + +define void @constraint_jR_test() nounwind { +; CHECK-LABEL: constraint_jR_test: +; CHECK: addq %r16, %rax +entry: + %reg = alloca i64, align 8 + %0 = load i64, ptr %reg, align 8 + call void asm sideeffect "add $0, %rax", "^jR,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0) + ret void +} diff --git a/llvm/test/CodeGen/X86/apx/asm-constraint-jr.ll b/llvm/test/CodeGen/X86/apx/asm-constraint-jr.ll new file mode 100644 index 0000000000000..0c6d6a78cfb12 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/asm-constraint-jr.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: not llc -mtriple=x86_64 < %s >%t1 2>%t2 +; RUN: FileCheck %s <%t1 +; RUN: FileCheck %s <%t2 --check-prefix=ERR +; RUN: not llc -mattr=+egpr -mtriple=x86_64 < %s >%t1 2>%t2 +; RUN: FileCheck %s <%t1 +; RUN: FileCheck %s <%t2 --check-prefix=ERR +; RUN: not llc -mattr=+egpr,+inline-asm-use-gpr32 -mtriple=x86_64 < %s >%t1 2>%t2 +; RUN: FileCheck %s <%t1 +; RUN: FileCheck %s <%t2 --check-prefix=ERR + +; CHECK: addq %r8, %rax +define void @constraint_jr_test() nounwind { +entry: + %reg = alloca i64, align 8 + %0 = load i64, ptr %reg, align 8 + call void asm sideeffect "add $0, %rax", "^jr,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{dirflag},~{fpsr},~{flags}"(i64 %0) + ret void +} + +; ERR: error: inline assembly requires more registers than available +define void @constraint_jr_test_err() nounwind { +entry: + %reg = alloca i64, align 8 + %0 = load i64, ptr %reg, align 8 + call void asm sideeffect "add $0, %rax", "^jr,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0) + ret void +} diff --git a/llvm/test/CodeGen/X86/apx/asm-constraint.ll b/llvm/test/CodeGen/X86/apx/asm-constraint.ll index 9b81cbf29c25b..114e8152e9757 100644 --- a/llvm/test/CodeGen/X86/apx/asm-constraint.ll +++ b/llvm/test/CodeGen/X86/apx/asm-constraint.ll @@ -1,21 +1,26 @@ ; Check r16-r31 can not be used with 'q','r','l' constraint for backward compatibility. -; RUN: not llc < %s -mtriple=x86_64-unknown-unknown -mattr=+egpr 2>&1 | FileCheck %s +; RUN: not llc -mtriple=x86_64 < %s 2>&1 | FileCheck %s --check-prefix=ERR +; RUN: not llc -mtriple=x86_64 -mattr=+egpr < %s 2>&1 | FileCheck %s --check-prefix=ERR +; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s define void @q() { -; CHECK: error: inline assembly requires more registers than available - %a = call i32 asm sideeffect "movq %rax, $0", "=q,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() +; ERR: error: inline assembly requires more registers than available +; CHECK: movq %rax, %r16 + %a = call i64 asm sideeffect "movq %rax, $0", "=q,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() ret void } define void @r() { -; CHECK: error: inline assembly requires more registers than available - %a = call i32 asm sideeffect "movq %rax, $0", "=r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() +; ERR: error: inline assembly requires more registers than available +; CHECK: movq %rax, %r16 + %a = call i64 asm sideeffect "movq %rax, $0", "=r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() ret void } define void @l() { -; CHECK: error: inline assembly requires more registers than available - %a = call i32 asm sideeffect "movq %rax, $0", "=l,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() +; ERR: error: inline assembly requires more registers than available +; CHECK: movq %rax, %r16 + %a = call i64 asm sideeffect "movq %rax, $0", "=l,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() ret void } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits