LiuChen3 created this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
LiuChen3 abandoned this revision.
...alignment, including
struct, union and vector types. For struct/union, there is no probem because it
will align
to 4 bytes when passing them. For __m128/__m256/__m512 vector type, it will get
wrong result.
This patch will get va_arg according the rules below:
1. When the target doesn't support avx and avx512: get __m128/__m256/__m512
from 16 bytes aligned stack.
2. When the target supports avx: get __m256/__m512 from 32 bytes aligned stack.
3. When the target supports avx512: get __m512 from 64 bytes aligned stack.
Notice: The current behavior of clang is inconsistent with i386 abi. The
i386-abi says as below:
1. If parameters of type __m256 are required to be passed on the stack, the
stack pointer must be aligned on a 0 mod 32 byte boundary at the time of the
call.
2. If parameters of type __m512 are required to be passed on the stack, the
stack pointer must be aligned on a 0 mod 64 byte boundary at the time of the
call.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D78473
Files:
clang/lib/Basic/Targets/X86.h
clang/lib/CodeGen/TargetInfo.cpp
clang/test/CodeGen/x86_32-align-linux-avx2.c
clang/test/CodeGen/x86_32-align-linux-avx512f.c
clang/test/CodeGen/x86_32-align-linux.c
Index: clang/test/CodeGen/x86_32-align-linux.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux.c
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include <immintrin.h>
+
+// CHECK-LABEL: define void @testm128
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT: %1 = add i32 %0, 15
+// CHECK-NEXT: %2 = and i32 %1, -16
+// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm128(int argCount, ...) {
+ __m128 res;
+ __builtin_va_list args;
+ __builtin_va_start(args, argCount);
+ res = __builtin_va_arg(args, __m128);
+ __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define void @testm256
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT: %1 = add i32 %0, 15
+// CHECK-NEXT: %2 = and i32 %1, -16
+// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm256(int argCount, ...) {
+ __m256 res;
+ __builtin_va_list args;
+ __builtin_va_start(args, argCount);
+ res = __builtin_va_arg(args, __m256);
+ __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define void @testm512
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT: %1 = add i32 %0, 15
+// CHECK-NEXT: %2 = and i32 %1, -16
+// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm512(int argCount, ...) {
+ __m512 res;
+ __builtin_va_list args;
+ __builtin_va_start(args, argCount);
+ res = __builtin_va_arg(args, __m512);
+ __builtin_va_end(args);
+}
Index: clang/test/CodeGen/x86_32-align-linux-avx512f.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux-avx512f.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx512f -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include <immintrin.h>
+
+// CHECK-LABEL: define void @testm512
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT: %1 = add i32 %0, 63
+// CHECK-NEXT: %2 = and i32 %1, -64
+// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm512(int argCount, ...) {
+ __m512 res;
+ __builtin_va_list args;
+ __builtin_va_start(args, argCount);
+ res = __builtin_va_arg(args, __m512);
+ __builtin_va_end(args);
+}
Index: clang/test/CodeGen/x86_32-align-linux-avx2.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux-avx2.c
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include <immintrin.h>
+
+// CHECK-LABEL: define void @testm128
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT: %1 = add i32 %0, 15
+// CHECK-NEXT: %2 = and i32 %1, -16
+// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm128(int argCount, ...) {
+ __m128 res;
+ __builtin_va_list args;
+ __builtin_va_start(args, argCount);
+ res = __builtin_va_arg(args, __m128);
+ __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define void @testm256
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT: %1 = add i32 %0, 31
+// CHECK-NEXT: %2 = and i32 %1, -32
+// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm256(int argCount, ...) {
+ __m256 res;
+ __builtin_va_list args;
+ __builtin_va_start(args, argCount);
+ res = __builtin_va_arg(args, __m256);
+ __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define void @testm512
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT: %1 = add i32 %0, 31
+// CHECK-NEXT: %2 = and i32 %1, -32
+// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm512(int argCount, ...) {
+ __m512 res;
+ __builtin_va_list args;
+ __builtin_va_start(args, argCount);
+ res = __builtin_va_arg(args, __m512);
+ __builtin_va_end(args);
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -1023,6 +1023,9 @@
// X86-32 ABI Implementation
//===----------------------------------------------------------------------===//
+/// The AVX ABI level for X86 targets.
+enum class X86AVXABILevel { None, AVX, AVX512 };
+
/// Similar to llvm::CCState, but for Clang.
struct CCState {
CCState(CGFunctionInfo &FI)
@@ -1053,7 +1056,9 @@
bool IsWin32StructABI;
bool IsSoftFloatABI;
bool IsMCUABI;
+ bool IsLinuxABI;
unsigned DefaultNumRegisterParameters;
+ X86AVXABILevel AVXLevel;
static bool isRegisterSize(unsigned Size) {
return (Size == 8 || Size == 16 || Size == 32 || Size == 64);
@@ -1112,13 +1117,15 @@
X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
bool RetSmallStructInRegABI, bool Win32StructABI,
- unsigned NumRegisterParameters, bool SoftFloatABI)
- : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
- IsRetSmallStructInRegABI(RetSmallStructInRegABI),
- IsWin32StructABI(Win32StructABI),
- IsSoftFloatABI(SoftFloatABI),
- IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
- DefaultNumRegisterParameters(NumRegisterParameters) {}
+ unsigned NumRegisterParameters, bool SoftFloatABI,
+ X86AVXABILevel AVXLevel)
+ : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
+ IsRetSmallStructInRegABI(RetSmallStructInRegABI),
+ IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI),
+ IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
+ IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()),
+ DefaultNumRegisterParameters(NumRegisterParameters),
+ AVXLevel(AVXLevel) {}
bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
bool asReturnValue) const override {
@@ -1139,10 +1146,11 @@
public:
X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
bool RetSmallStructInRegABI, bool Win32StructABI,
- unsigned NumRegisterParameters, bool SoftFloatABI)
+ unsigned NumRegisterParameters, bool SoftFloatABI,
+ X86AVXABILevel AVXLevel)
: TargetCodeGenInfo(new X86_32ABIInfo(
CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI,
- NumRegisterParameters, SoftFloatABI)) {}
+ NumRegisterParameters, SoftFloatABI, AVXLevel)) {}
static bool isStructReturnInRegABI(
const llvm::Triple &Triple, const CodeGenOptions &Opts);
@@ -1538,6 +1546,29 @@
if (Align <= MinABIStackAlignInBytes)
return 0; // Use default alignment.
+ if (IsLinuxABI) {
+ // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't
+ // want to spend any effort dealing with the ramifications of ABI breaks.
+ // If the target only supports doesn't support avx, return 16.
+ // If the target supports avx or avx512, __m256 will align to 32 bytes.
+ // __m512 will align to 64 bytes when the target supports avx512, align to
+ // 32 bytes when the target supports avx and 16 for the other.
+ if (Ty->getAs<VectorType>()) {
+ int TypeSize = getContext().getTypeSize(Ty);
+ if (TypeSize == 128)
+ return Align;
+ else if (TypeSize == 256)
+ return (AVXLevel == X86AVXABILevel::AVX ||
+ AVXLevel == X86AVXABILevel::AVX512)
+ ? Align
+ : 16;
+ else
+ return AVXLevel == X86AVXABILevel::AVX512
+ ? Align
+ : AVXLevel == X86AVXABILevel::AVX ? 32 : 16;
+ } else
+ return MinABIStackAlignInBytes;
+ }
// On non-Darwin, the stack type alignment is always 4.
if (!IsDarwinVectorABI) {
// Set explicit alignment, since we may need to realign the top.
@@ -2086,12 +2117,6 @@
namespace {
-/// The AVX ABI level for X86 targets.
-enum class X86AVXABILevel {
- None,
- AVX,
- AVX512
-};
/// \p returns the size in bits of the largest (native) vector for \p AVXLevel.
static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
@@ -2432,11 +2457,12 @@
class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo {
public:
- WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
- bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI,
- unsigned NumRegisterParameters)
- : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI,
- Win32StructABI, NumRegisterParameters, false) {}
+ WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
+ bool RetSmallStructInRegABI, bool Win32StructABI,
+ unsigned NumRegisterParameters)
+ : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI,
+ Win32StructABI, NumRegisterParameters, false,
+ X86AVXABILevel::None) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override;
@@ -10327,6 +10353,12 @@
X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing();
+ StringRef ABI = getTarget().getABI();
+ X86AVXABILevel AVXLevel =
+ (ABI == "avx512"
+ ? X86AVXABILevel::AVX512
+ : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None);
+
if (Triple.getOS() == llvm::Triple::Win32) {
return SetCGInfo(new WinX86_32TargetCodeGenInfo(
Types, IsDarwinVectorABI, RetSmallStructInRegABI,
@@ -10335,7 +10367,7 @@
return SetCGInfo(new X86_32TargetCodeGenInfo(
Types, IsDarwinVectorABI, RetSmallStructInRegABI,
IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters,
- CodeGenOpts.FloatABI == "soft"));
+ CodeGenOpts.FloatABI == "soft", AVXLevel));
}
}
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -302,12 +302,14 @@
DiagnosticsEngine &Diags) override;
StringRef getABI() const override {
- if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX512F)
+ llvm::Triple::ArchType Arch = getTriple().getArch();
+ if ((Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86) &&
+ SSELevel >= AVX512F)
return "avx512";
- if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX)
+ if ((Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86) &&
+ SSELevel >= AVX)
return "avx";
- if (getTriple().getArch() == llvm::Triple::x86 &&
- MMX3DNowLevel == NoMMX3DNow)
+ if (Arch == llvm::Triple::x86 && MMX3DNowLevel == NoMMX3DNow)
return "no-mmx";
return "";
}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits