r362196 - [X86] Add VP2INTERSECT instructions
Author: pengfei Date: Thu May 30 23:09:35 2019 New Revision: 362196 URL: http://llvm.org/viewvc/llvm-project?rev=362196&view=rev Log: [X86] Add VP2INTERSECT instructions Support intel AVX512 VP2INTERSECT instructions in clang Patch by Xiang Zhang (xiangzhangllvm) Differential Revision: https://reviews.llvm.org/D62367 Added: cfe/trunk/lib/Headers/avx512vlvp2intersectintrin.h cfe/trunk/lib/Headers/avx512vp2intersectintrin.h cfe/trunk/test/CodeGen/intel-avx512vlvp2intersect.c cfe/trunk/test/CodeGen/intel-avx512vp2intersect.c Modified: cfe/trunk/docs/ClangCommandLineReference.rst cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/include/clang/Driver/Options.td cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/lib/Basic/Targets/X86.h cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/CMakeLists.txt cfe/trunk/lib/Headers/immintrin.h cfe/trunk/test/CodeGen/attr-target-x86.c cfe/trunk/test/Driver/x86-target-features.c cfe/trunk/test/Preprocessor/x86_target_features.c Modified: cfe/trunk/docs/ClangCommandLineReference.rst URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/docs/ClangCommandLineReference.rst?rev=362196&r1=362195&r2=362196&view=diff == --- cfe/trunk/docs/ClangCommandLineReference.rst (original) +++ cfe/trunk/docs/ClangCommandLineReference.rst Thu May 30 23:09:35 2019 @@ -2639,6 +2639,8 @@ X86 .. option:: -mavx512vnni, -mno-avx512vnni +.. option:: -mavx512vp2intersect, -mno-avx512vp2intersect + .. option:: -mavx512vpopcntdq, -mno-avx512vpopcntdq .. option:: -mbmi, -mno-bmi Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=362196&r1=362195&r2=362196&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu May 30 23:09:35 2019 @@ -1840,6 +1840,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtneps2bf TARGET_BUILTIN(__builtin_ia32_dpbf16ps_128, "V4fV4fV4iV4i", "ncV:128:", "avx512bf16,avx512vl") TARGET_BUILTIN(__builtin_ia32_dpbf16ps_256, "V8fV8fV8iV8i", "ncV:256:", "avx512bf16,avx512vl") TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV16iV16i", "ncV:512:", "avx512bf16") +TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_512, "vV8LLiV8LLiUc*Uc*", "nV:512:", "avx512vp2intersect") +TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_256, "vV4LLiV4LLiUc*Uc*", "nV:256:", "avx512vp2intersect,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_128, "vV2LLiV2LLiUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_512, "vV16iV16iUs*Us*", "nV:512:", "avx512vp2intersect") +TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_256, "vV8iV8iUc*Uc*", "nV:256:", "avx512vp2intersect,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_128, "vV4iV4iUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl") // generic select intrinsics TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl") Modified: cfe/trunk/include/clang/Driver/Options.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=362196&r1=362195&r2=362196&view=diff == --- cfe/trunk/include/clang/Driver/Options.td (original) +++ cfe/trunk/include/clang/Driver/Options.td Thu May 30 23:09:35 2019 @@ -2894,6 +2894,8 @@ def mavx512vnni : Flag<["-"], "mavx512vn def mno_avx512vnni : Flag<["-"], "mno-avx512vnni">, Group; def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group; def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group; +def mavx512vp2intersect : Flag<["-"], "mavx512vp2intersect">, Group; +def mno_avx512vp2intersect : Flag<["-"], "mno-avx512vp2intersect">, Group; def madx : Flag<["-"], "madx">, Group; def mno_adx : Flag<["-"], "mno-adx">, Group; def maes : Flag<["-"], "maes">, Group; Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=362196&r1=362195&r2=362196&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Thu May 30 23:09:35 2019 @@ -524,6 +524,7 @@ void X86TargetInfo::setSSELevel(llvm::St Features["avx512ifma"] = Features["avx512vpopcntdq"] = false; Features["avx512bitalg"] = Features["avx512vnni"] = false; Features["avx512vbmi2"] = Features["avx512bf16"] = false; +Features["avx512vp2intersect"] = false; break; } } @@ -774,6 +775,8 @@ bool X86TargetInfo::handleTargetFeatures HasAVX512VBMI2 = true; } else if (Feature == "+avx512ifma") { HasAVX512IFMA = true; +} else if (Feature == "+av
r361934 - [X86] Fix i386 struct and union parameter alignment
Author: pengfei Date: Wed May 29 01:42:35 2019 New Revision: 361934 URL: http://llvm.org/viewvc/llvm-project?rev=361934&view=rev Log: [X86] Fix i386 struct and union parameter alignment According to i386 System V ABI 2.1: Structures and unions assume the alignment of their most strictly aligned component. But current implementation always takes them as 4-byte aligned which will result in incorrect code, e.g: 1 #include 2 typedef union { 3 int d[4]; 4 __m128 m; 5 } M128; 6 extern void foo(int, ...); 7 void test(void) 8 { 9 M128 a; 10 foo(1, a); 11 foo(1, a.m); 12 } The first call (line 10) takes the second arg as 4-byte aligned while the second call (line 11) takes the second arg as 16-byte aligned. There is oxymoron for the alignment of the 2 calls because they should be the same. This patch fixes the bug by following i386 System V ABI and apply it to Linux only since other System V OS (e.g Darwin, PS4 and FreeBSD) don't want to spend any effort dealing with the ramifications of ABI breaks at present. Patch by Wei Xiao (wxiao3) Differential Revision: https://reviews.llvm.org/D60748 Added: cfe/trunk/test/CodeGen/x86_32-align-linux.c Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp cfe/trunk/test/CodeGen/x86_32-arguments-linux.c Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=361934&r1=361933&r2=361934&view=diff == --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Wed May 29 01:42:35 2019 @@ -1010,6 +1010,7 @@ class X86_32ABIInfo : public SwiftABIInf bool IsWin32StructABI; bool IsSoftFloatABI; bool IsMCUABI; + bool IsLinuxABI; unsigned DefaultNumRegisterParameters; static bool isRegisterSize(unsigned Size) { @@ -1076,6 +1077,7 @@ public: IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), + IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()), DefaultNumRegisterParameters(NumRegisterParameters) {} bool shouldPassIndirectlyForSwift(ArrayRef scalars, @@ -1492,8 +1494,15 @@ unsigned X86_32ABIInfo::getTypeStackAlig if (Align <= MinABIStackAlignInBytes) return 0; // Use default alignment. - // On non-Darwin, the stack type alignment is always 4. - if (!IsDarwinVectorABI) { + if (IsLinuxABI) { +// i386 System V ABI 2.1: Structures and unions assume the alignment of their +// most strictly aligned component. +// +// Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't +// want to spend any effort dealing with the ramifications of ABI breaks. +return Align; + } else if (!IsDarwinVectorABI) { +// On non-Darwin and non-Linux, the stack type alignment is always 4. // Set explicit alignment, since we may need to realign the top. return MinABIStackAlignInBytes; } Added: cfe/trunk/test/CodeGen/x86_32-align-linux.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86_32-align-linux.c?rev=361934&view=auto == --- cfe/trunk/test/CodeGen/x86_32-align-linux.c (added) +++ cfe/trunk/test/CodeGen/x86_32-align-linux.c Wed May 29 01:42:35 2019 @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s +// RUN: FileCheck < %t %s + +#include + +typedef union { +int d[4]; +__m128 m; +} M128; + +extern void foo(int, ...); + +M128 a; + +// CHECK-LABEL: define void @test +// CHECK: entry: +// CHECK: call void (i32, ...) @foo(i32 1, %union.M128* byval align 16 +// CHECK: call void (i32, ...) @foo(i32 1, <4 x float> + +void test(void) +{ + foo(1, a); + foo(1, a.m); +} + Modified: cfe/trunk/test/CodeGen/x86_32-arguments-linux.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86_32-arguments-linux.c?rev=361934&r1=361933&r2=361934&view=diff == --- cfe/trunk/test/CodeGen/x86_32-arguments-linux.c (original) +++ cfe/trunk/test/CodeGen/x86_32-arguments-linux.c Wed May 29 01:42:35 2019 @@ -3,21 +3,21 @@ // CHECK-LABEL: define void @f56( // CHECK: i8 signext %a0, %struct.s56_0* byval align 4 %a1, -// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 4, -// CHECK: <1 x double> %a4, %struct.s56_2* byval align 4, -// CHECK: <4 x i32> %a6, %struct.s56_3* byval align 4, -// CHECK: <2 x double> %a8, %struct.s56_4* byval align 4, -// CHECK: <8 x i32> %a10, %struct.s56_5* byval align 4, -// CHECK: <4 x double> %a12, %struct.s56_6* byval align 4) +// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 8 %a3, +// CHECK: <1 x double> %a4, %struct.s56_2* byval align 8 %a5, +// CHECK: <4 x i32> %a6, %struct.s56_3* byval align 16 %a7, +// CHECK: <2 x double> %a8, %struct.
r362186 - Revert "[X86] Fix i386 struct and union parameter alignment"
Author: pengfei Date: Thu May 30 18:50:07 2019 New Revision: 362186 URL: http://llvm.org/viewvc/llvm-project?rev=362186&view=rev Log: Revert "[X86] Fix i386 struct and union parameter alignment" This reverts commit d61cb749f4ac2c90244906d756e80a5c4a7ffa89 (SVN: 361934). According to James suggestion, revert this change. Please ref: https://reviews.llvm.org/D60748 Removed: cfe/trunk/test/CodeGen/x86_32-align-linux.c Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp cfe/trunk/test/CodeGen/x86_32-arguments-linux.c Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=362186&r1=362185&r2=362186&view=diff == --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Thu May 30 18:50:07 2019 @@ -1010,7 +1010,6 @@ class X86_32ABIInfo : public SwiftABIInf bool IsWin32StructABI; bool IsSoftFloatABI; bool IsMCUABI; - bool IsLinuxABI; unsigned DefaultNumRegisterParameters; static bool isRegisterSize(unsigned Size) { @@ -1077,7 +1076,6 @@ public: IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), - IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()), DefaultNumRegisterParameters(NumRegisterParameters) {} bool shouldPassIndirectlyForSwift(ArrayRef scalars, @@ -1494,15 +1492,8 @@ unsigned X86_32ABIInfo::getTypeStackAlig if (Align <= MinABIStackAlignInBytes) return 0; // Use default alignment. - if (IsLinuxABI) { -// i386 System V ABI 2.1: Structures and unions assume the alignment of their -// most strictly aligned component. -// -// Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't -// want to spend any effort dealing with the ramifications of ABI breaks. -return Align; - } else if (!IsDarwinVectorABI) { -// On non-Darwin and non-Linux, the stack type alignment is always 4. + // On non-Darwin, the stack type alignment is always 4. + if (!IsDarwinVectorABI) { // Set explicit alignment, since we may need to realign the top. return MinABIStackAlignInBytes; } Removed: cfe/trunk/test/CodeGen/x86_32-align-linux.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86_32-align-linux.c?rev=362185&view=auto == --- cfe/trunk/test/CodeGen/x86_32-align-linux.c (original) +++ cfe/trunk/test/CodeGen/x86_32-align-linux.c (removed) @@ -1,25 +0,0 @@ -// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s -// RUN: FileCheck < %t %s - -#include - -typedef union { -int d[4]; -__m128 m; -} M128; - -extern void foo(int, ...); - -M128 a; - -// CHECK-LABEL: define void @test -// CHECK: entry: -// CHECK: call void (i32, ...) @foo(i32 1, %union.M128* byval align 16 -// CHECK: call void (i32, ...) @foo(i32 1, <4 x float> - -void test(void) -{ - foo(1, a); - foo(1, a.m); -} - Modified: cfe/trunk/test/CodeGen/x86_32-arguments-linux.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86_32-arguments-linux.c?rev=362186&r1=362185&r2=362186&view=diff == --- cfe/trunk/test/CodeGen/x86_32-arguments-linux.c (original) +++ cfe/trunk/test/CodeGen/x86_32-arguments-linux.c Thu May 30 18:50:07 2019 @@ -3,21 +3,21 @@ // CHECK-LABEL: define void @f56( // CHECK: i8 signext %a0, %struct.s56_0* byval align 4 %a1, -// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 8 %a3, -// CHECK: <1 x double> %a4, %struct.s56_2* byval align 8 %a5, -// CHECK: <4 x i32> %a6, %struct.s56_3* byval align 16 %a7, -// CHECK: <2 x double> %a8, %struct.s56_4* byval align 16 %a9, -// CHECK: <8 x i32> %a10, %struct.s56_5* byval align 32 %a11, -// CHECK: <4 x double> %a12, %struct.s56_6* byval align 32 %a13) +// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 4, +// CHECK: <1 x double> %a4, %struct.s56_2* byval align 4, +// CHECK: <4 x i32> %a6, %struct.s56_3* byval align 4, +// CHECK: <2 x double> %a8, %struct.s56_4* byval align 4, +// CHECK: <8 x i32> %a10, %struct.s56_5* byval align 4, +// CHECK: <4 x double> %a12, %struct.s56_6* byval align 4) // CHECK: call void (i32, ...) @f56_0(i32 1, // CHECK: i32 %{{.*}}, %struct.s56_0* byval align 4 %{{[^ ]*}}, -// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval align 8 %{{[^ ]*}}, -// CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval align 8 %{{[^ ]*}}, -// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval align 16 %{{[^ ]*}}, -// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval align 16 %{{[^ ]*}}, -// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval align 32 %{{[^ ]*}}, -// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval align 32 %{{[^ ]*}}) +// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval align 4 %
r362685 - [X86] Add ENQCMD instructions
Author: pengfei Date: Thu Jun 6 01:28:42 2019 New Revision: 362685 URL: http://llvm.org/viewvc/llvm-project?rev=362685&view=rev Log: [X86] Add ENQCMD instructions For more details about these instructions, please refer to the latest ISE document: https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference. Patch by Tianqing Wang (tianqing) Differential Revision: https://reviews.llvm.org/D62282 Added: cfe/trunk/lib/Headers/enqcmdintrin.h cfe/trunk/test/CodeGen/x86-enqcmd-builtins.c Modified: cfe/trunk/docs/ClangCommandLineReference.rst cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/include/clang/Driver/Options.td cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/lib/Basic/Targets/X86.h cfe/trunk/lib/Headers/CMakeLists.txt cfe/trunk/lib/Headers/cpuid.h cfe/trunk/lib/Headers/immintrin.h cfe/trunk/test/Driver/x86-target-features.c cfe/trunk/test/Preprocessor/x86_target_features.c Modified: cfe/trunk/docs/ClangCommandLineReference.rst URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/docs/ClangCommandLineReference.rst?rev=362685&r1=362684&r2=362685&view=diff == --- cfe/trunk/docs/ClangCommandLineReference.rst (original) +++ cfe/trunk/docs/ClangCommandLineReference.rst Thu Jun 6 01:28:42 2019 @@ -2657,6 +2657,8 @@ X86 .. option:: -mcx16, -mno-cx16 +.. option:: -menqcmd, -mno-enqcmd + .. option:: -mf16c, -mno-f16c .. option:: -mfma, -mno-fma Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=362685&r1=362684&r2=362685&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu Jun 6 01:28:42 2019 @@ -1894,6 +1894,10 @@ TARGET_BUILTIN(__builtin_ia32_ptwrite32, // INVPCID TARGET_BUILTIN(__builtin_ia32_invpcid, "vUiv*", "nc", "invpcid") +// ENQCMD +TARGET_BUILTIN(__builtin_ia32_enqcmd, "Ucv*vC*", "n", "enqcmd") +TARGET_BUILTIN(__builtin_ia32_enqcmds, "Ucv*vC*", "n", "enqcmd") + // MSVC TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") Modified: cfe/trunk/include/clang/Driver/Options.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=362685&r1=362684&r2=362685&view=diff == --- cfe/trunk/include/clang/Driver/Options.td (original) +++ cfe/trunk/include/clang/Driver/Options.td Thu Jun 6 01:28:42 2019 @@ -2916,6 +2916,8 @@ def mclzero : Flag<["-"], "mclzero">, Gr def mno_clzero : Flag<["-"], "mno-clzero">, Group; def mcx16 : Flag<["-"], "mcx16">, Group; def mno_cx16 : Flag<["-"], "mno-cx16">, Group; +def menqcmd : Flag<["-"], "menqcmd">, Group; +def mno_enqcmd : Flag<["-"], "mno-enqcmd">, Group; def mf16c : Flag<["-"], "mf16c">, Group; def mno_f16c : Flag<["-"], "mno-f16c">, Group; def mfma : Flag<["-"], "mfma">, Group; Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=362685&r1=362684&r2=362685&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Thu Jun 6 01:28:42 2019 @@ -835,6 +835,8 @@ bool X86TargetInfo::handleTargetFeatures HasPTWRITE = true; } else if (Feature == "+invpcid") { HasINVPCID = true; +} else if (Feature == "+enqcmd") { + HasENQCMD = true; } X86SSEEnum Level = llvm::StringSwitch(Feature) @@ -1218,6 +1220,8 @@ void X86TargetInfo::getTargetDefines(con Builder.defineMacro("__PTWRITE__"); if (HasINVPCID) Builder.defineMacro("__INVPCID__"); + if (HasENQCMD) +Builder.defineMacro("__ENQCMD__"); // Each case falls through to the previous one here. switch (SSELevel) { @@ -1334,6 +1338,7 @@ bool X86TargetInfo::isValidFeatureName(S .Case("clwb", true) .Case("clzero", true) .Case("cx16", true) + .Case("enqcmd", true) .Case("f16c", true) .Case("fma", true) .Case("fma4", true) @@ -1415,6 +1420,7 @@ bool X86TargetInfo::hasFeature(StringRef .Case("clzero", HasCLZERO) .Case("cx8", HasCX8) .Case("cx16", HasCX16) + .Case("enqcmd", HasENQCMD) .Case("f16c", HasF16C) .Case("fma", HasFMA) .Case("fma4", XOPLevel >= FMA4) Modified: cfe/trunk/lib/Basic/Targets/X86.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.h?rev=362685&r1=362684&r2=362685&view=diff == --- cfe/trunk/
r362781 - [X86] -march=cooperlake (clang)
Author: pengfei Date: Fri Jun 7 01:53:37 2019 New Revision: 362781 URL: http://llvm.org/viewvc/llvm-project?rev=362781&view=rev Log: [X86] -march=cooperlake (clang) Support intel -march=cooperlake in clang Patch by Shengchen Kan (skan) Differential Revision: https://reviews.llvm.org/D62835 Modified: cfe/trunk/include/clang/Basic/X86Target.def cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/test/Driver/x86-march.c cfe/trunk/test/Misc/target-invalid-cpu-note.c cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/include/clang/Basic/X86Target.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/X86Target.def?rev=362781&r1=362780&r2=362781&view=diff == --- cfe/trunk/include/clang/Basic/X86Target.def (original) +++ cfe/trunk/include/clang/Basic/X86Target.def Fri Jun 7 01:53:37 2019 @@ -157,6 +157,10 @@ PROC_ALIAS(SkylakeServer, "skx") /// Cascadelake Server microarchitecture based processors. PROC_WITH_FEAT(Cascadelake, "cascadelake", PROC_64_BIT, FEATURE_AVX512VNNI) +/// \name Cooperlake Server +/// Cooperlake Server microarchitecture based processors. +PROC_WITH_FEAT(Cooperlake, "cooperlake", PROC_64_BIT, FEATURE_AVX512BF16) + /// \name Cannonlake Client /// Cannonlake client microarchitecture based processors. PROC_WITH_FEAT(Cannonlake, "cannonlake", PROC_64_BIT, FEATURE_AVX512VBMI) @@ -292,6 +296,7 @@ FEATURE(FEATURE_GFNI) FEATURE(FEATURE_VPCLMULQDQ) FEATURE(FEATURE_AVX512VNNI) FEATURE(FEATURE_AVX512BITALG) +FEATURE(FEATURE_AVX512BF16) // FIXME: When commented out features are supported in LLVM, enable them here. Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=362781&r1=362780&r2=362781&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Fri Jun 7 01:53:37 2019 @@ -156,6 +156,13 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "avx512vbmi", true); setFeatureEnabledImpl(Features, "sha", true); LLVM_FALLTHROUGH; + case CK_Cooperlake: +// Cannonlake, IcelakeClient and IcelakeServer have no AVX512BF16 feature +if (Kind != CK_Cannonlake && Kind != CK_IcelakeClient && +Kind != CK_IcelakeServer) + // CPX inherits all CLX features plus AVX512BF16 + setFeatureEnabledImpl(Features, "avx512bf16", true); +LLVM_FALLTHROUGH; case CK_Cascadelake: //Cannonlake has no VNNI feature inside while Icelake has if (Kind != CK_Cannonlake) @@ -176,9 +183,9 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "xsavec", true); setFeatureEnabledImpl(Features, "xsaves", true); setFeatureEnabledImpl(Features, "mpx", true); -if (Kind != CK_SkylakeServer -&& Kind != CK_Cascadelake) - // SKX/CLX inherits all SKL features, except SGX +if (Kind != CK_SkylakeServer && Kind != CK_Cascadelake && +Kind != CK_Cooperlake) + // SKX/CLX/CPX inherits all SKL features, except SGX setFeatureEnabledImpl(Features, "sgx", true); setFeatureEnabledImpl(Features, "clflushopt", true); setFeatureEnabledImpl(Features, "aes", true); @@ -981,6 +988,7 @@ void X86TargetInfo::getTargetDefines(con case CK_SkylakeClient: case CK_SkylakeServer: case CK_Cascadelake: + case CK_Cooperlake: case CK_Cannonlake: case CK_IcelakeClient: case CK_IcelakeServer: Modified: cfe/trunk/test/Driver/x86-march.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/x86-march.c?rev=362781&r1=362780&r2=362781&view=diff == --- cfe/trunk/test/Driver/x86-march.c (original) +++ cfe/trunk/test/Driver/x86-march.c Fri Jun 7 01:53:37 2019 @@ -52,6 +52,10 @@ // RUN: | FileCheck %s -check-prefix=cascadelake // cascadelake: "-target-cpu" "cascadelake" // +// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=cooperlake 2>&1 \ +// RUN: | FileCheck %s -check-prefix=cooperlake +// cooperlake: "-target-cpu" "cooperlake" +// // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=knl 2>&1 \ // RUN: | FileCheck %s -check-prefix=knl // knl: "-target-cpu" "knl" Modified: cfe/trunk/test/Misc/target-invalid-cpu-note.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Misc/target-invalid-cpu-note.c?rev=362781&r1=362780&r2=362781&view=diff == --- cfe/trunk/test/Misc/target-invalid-cpu-note.c (original) +++ cfe/trunk/test/Misc/target-invalid-cpu-note.c Fri Jun 7 01:53:37 2019 @@ -16,7 +16,7 @@ // X86-SAME: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, // X86-SAME: nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridg
r363018 - [X86] Enable intrinsics that convert float and bf16 data to each other
Author: pengfei Date: Mon Jun 10 18:17:28 2019 New Revision: 363018 URL: http://llvm.org/viewvc/llvm-project?rev=363018&view=rev Log: [X86] Enable intrinsics that convert float and bf16 data to each other Scalar version : _mm_cvtsbh_ss , _mm_cvtness_sbh Vector version: _mm512_cvtpbh_ps , _mm256_cvtpbh_ps _mm512_maskz_cvtpbh_ps , _mm256_maskz_cvtpbh_ps _mm512_mask_cvtpbh_ps , _mm256_mask_cvtpbh_ps Patch by Shengchen Kan (skan) Differential Revision: https://reviews.llvm.org/D62363 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512bf16intrin.h cfe/trunk/lib/Headers/avx512vlbf16intrin.h cfe/trunk/test/CodeGen/avx512bf16-builtins.c cfe/trunk/test/CodeGen/avx512vlbf16-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=363018&r1=363017&r2=363018&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon Jun 10 18:17:28 2019 @@ -1831,6 +1831,8 @@ TARGET_BUILTIN(__builtin_ia32_cvtusi2ss3 TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512, "V64cV64cV64c", "ncV:512:", "avx512vbmi") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl") + +// bf16 intrinsics TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_128, "V8sV4fV4f", "ncV:128:", "avx512bf16,avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_256, "V16sV8fV8f", "ncV:256:", "avx512bf16,avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_512, "V32sV16fV16f", "ncV:512:", "avx512bf16") @@ -1840,6 +1842,8 @@ TARGET_BUILTIN(__builtin_ia32_cvtneps2bf TARGET_BUILTIN(__builtin_ia32_dpbf16ps_128, "V4fV4fV4iV4i", "ncV:128:", "avx512bf16,avx512vl") TARGET_BUILTIN(__builtin_ia32_dpbf16ps_256, "V8fV8fV8iV8i", "ncV:256:", "avx512bf16,avx512vl") TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV16iV16i", "ncV:512:", "avx512bf16") +TARGET_BUILTIN(__builtin_ia32_cvtsbf162ss_32, "fUs", "nc", "avx512bf16") + TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_512, "vV8LLiV8LLiUc*Uc*", "nV:512:", "avx512vp2intersect") TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_256, "vV4LLiV4LLiUc*Uc*", "nV:256:", "avx512vp2intersect,avx512vl") TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_128, "vV2LLiV2LLiUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=363018&r1=363017&r2=363018&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jun 10 18:17:28 2019 @@ -9795,6 +9795,18 @@ Value *CodeGenFunction::EmitX86CpuIs(con return EmitX86CpuIs(CPUStr); } +// Convert a BF16 to a float. +static Value *EmitX86CvtBF16ToFloatExpr(CodeGenFunction &CGF, +const CallExpr *E, +ArrayRef Ops) { + llvm::Type *Int32Ty = CGF.Builder.getInt32Ty(); + Value *ZeroExt = CGF.Builder.CreateZExt(Ops[0], Int32Ty); + Value *Shl = CGF.Builder.CreateShl(ZeroExt, 16); + llvm::Type *ResultType = CGF.ConvertType(E->getType()); + Value *BitCast = CGF.Builder.CreateBitCast(Shl, ResultType); + return BitCast; +} + Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { llvm::Type *Int32Ty = Builder.getInt32Ty(); @@ -11941,6 +11953,8 @@ Value *CodeGenFunction::EmitX86BuiltinEx Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128; return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } + case X86::BI__builtin_ia32_cvtsbf162ss_32: +return EmitX86CvtBF16ToFloatExpr(*this, E, Ops); case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: { Modified: cfe/trunk/lib/Headers/avx512bf16intrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bf16intrin.h?rev=363018&r1=363017&r2=363018&view=diff == --- cfe/trunk/lib/Headers/avx512bf16intrin.h (original) +++ cfe/trunk/lib/Headers/avx512bf16intrin.h Mon Jun 10 18:17:28 2019 @@ -15,10 +15,27 @@ typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64))); typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32))); +typedef unsigned short __bfloat16; #define __DEFAULT_FN_ATTRS512 \ __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \ __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__,
r363116 - [X86] [ABI] Fix i386 ABI "__m64" type bug
Author: pengfei Date: Tue Jun 11 18:52:23 2019 New Revision: 363116 URL: http://llvm.org/viewvc/llvm-project?rev=363116&view=rev Log: [X86] [ABI] Fix i386 ABI "__m64" type bug According to System V i386 ABI: the __m64 type paramater and return value are passed by MMX registers. But current implementation treats __m64 as i64 which results in parameter passing by stack and returning by EDX and EAX. This patch fixes the bug (https://bugs.llvm.org/show_bug.cgi?id=41029) for Linux and NetBSD. Patch by Wei Xiao (wxiao3) Differential Revision: https://reviews.llvm.org/D59744 Added: cfe/trunk/test/CodeGen/x86_32-m64.c Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp cfe/trunk/test/CodeGen/x86_32-arguments-linux.c Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=363116&r1=363115&r2=363116&view=diff == --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Tue Jun 11 18:52:23 2019 @@ -915,14 +915,6 @@ ABIArgInfo PNaClABIInfo::classifyReturnT : ABIArgInfo::getDirect()); } -/// IsX86_MMXType - Return true if this is an MMX type. -bool IsX86_MMXType(llvm::Type *IRType) { - // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>. - return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 && -cast(IRType)->getElementType()->isIntegerTy() && -IRType->getScalarSizeInBits() != 64; -} - static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) { @@ -1011,6 +1003,7 @@ class X86_32ABIInfo : public SwiftABIInf bool IsSoftFloatABI; bool IsMCUABI; unsigned DefaultNumRegisterParameters; + bool IsMMXEnabled; static bool isRegisterSize(unsigned Size) { return (Size == 8 || Size == 16 || Size == 32 || Size == 64); @@ -1070,13 +1063,15 @@ public: X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, -unsigned NumRegisterParameters, bool SoftFloatABI) +unsigned NumRegisterParameters, bool SoftFloatABI, +bool MMXEnabled) : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), IsRetSmallStructInRegABI(RetSmallStructInRegABI), IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), - DefaultNumRegisterParameters(NumRegisterParameters) {} + DefaultNumRegisterParameters(NumRegisterParameters), + IsMMXEnabled(MMXEnabled) {} bool shouldPassIndirectlyForSwift(ArrayRef scalars, bool asReturnValue) const override { @@ -1091,16 +1086,30 @@ public: // x86-32 lowering does not support passing swifterror in a register. return false; } + + bool isPassInMMXRegABI() const { +// The System V i386 psABI requires __m64 to be passed in MMX registers. +// Clang historically had a bug where it failed to apply this rule, and +// some platforms (e.g. Darwin, PS4, and FreeBSD) have opted to maintain +// compatibility with the old Clang behavior, so we only apply it on +// platforms that have specifically requested it (currently just Linux and +// NetBSD). +const llvm::Triple &T = getTarget().getTriple(); +if (IsMMXEnabled && (T.isOSLinux() || T.isOSNetBSD())) + return true; +return false; + } }; class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { public: X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, - unsigned NumRegisterParameters, bool SoftFloatABI) + unsigned NumRegisterParameters, bool SoftFloatABI, + bool MMXEnabled = false) : TargetCodeGenInfo(new X86_32ABIInfo( CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, -NumRegisterParameters, SoftFloatABI)) {} +NumRegisterParameters, SoftFloatABI, MMXEnabled)) {} static bool isStructReturnInRegABI( const llvm::Triple &Triple, const CodeGenOptions &Opts); @@ -1386,10 +1395,9 @@ ABIArgInfo X86_32ABIInfo::classifyReturn } if (const VectorType *VT = RetTy->getAs()) { +uint64_t Size = getContext().getTypeSize(RetTy); // On Darwin, some vectors are returned in registers. if (IsDarwinVectorABI) { - uint64_t Size = getContext().getTypeSize(RetTy); - // 128-bit vectors are a special case; they are returned in // registers and we need to make sure to pick a type the LLVM // backend will like. @@ -1407,6 +1415,10 @@ ABIArgInfo X86_32AB
r368543 - [X86] Support -march=tigerlake
Author: pengfei Date: Sun Aug 11 18:29:46 2019 New Revision: 368543 URL: http://llvm.org/viewvc/llvm-project?rev=368543&view=rev Log: [X86] Support -march=tigerlake Support -march=tigerlake for x86. Compare with Icelake Client, It include 4 more new features ,they are avx512vp2intersect, movdiri, movdir64b, shstk. Patch by Xiang Zhang (xiangzhangllvm) Differential Revision: https://reviews.llvm.org/D65840 Modified: cfe/trunk/include/clang/Basic/X86Target.def cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/test/Driver/x86-march.c cfe/trunk/test/Misc/target-invalid-cpu-note.c cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/include/clang/Basic/X86Target.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/X86Target.def?rev=368543&r1=368542&r2=368543&view=diff == --- cfe/trunk/include/clang/Basic/X86Target.def (original) +++ cfe/trunk/include/clang/Basic/X86Target.def Sun Aug 11 18:29:46 2019 @@ -173,6 +173,10 @@ PROC(IcelakeClient, "icelake-client", PR /// Icelake server microarchitecture based processors. PROC(IcelakeServer, "icelake-server", PROC_64_BIT) +/// \name Tigerlake Server +/// Tigerlake Server microarchitecture based processors. +PROC(Tigerlake, "tigerlake", PROC_64_BIT) + /// \name Knights Landing /// Knights Landing processor. PROC_WITH_FEAT(KNL, "knl", PROC_64_BIT, FEATURE_AVX512F) @@ -297,6 +301,7 @@ FEATURE(FEATURE_VPCLMULQDQ) FEATURE(FEATURE_AVX512VNNI) FEATURE(FEATURE_AVX512BITALG) FEATURE(FEATURE_AVX512BF16) +FEATURE(FEATURE_AVX512VP2INTERSECT) // FIXME: When commented out features are supported in LLVM, enable them here. Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=368543&r1=368542&r2=368543&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Sun Aug 11 18:29:46 2019 @@ -157,11 +157,20 @@ bool X86TargetInfo::initFeatureMap( // SkylakeServer cores inherits all SKL features, except SGX goto SkylakeCommon; + case CK_Tigerlake: +setFeatureEnabledImpl(Features, "avx512vp2intersect", true); +setFeatureEnabledImpl(Features, "movdiri", true); +setFeatureEnabledImpl(Features, "movdir64b", true); +setFeatureEnabledImpl(Features, "shstk", true); +// Tigerlake cores inherits IcelakeClient, except pconfig and wbnoinvd +goto IcelakeCommon; + case CK_IcelakeServer: setFeatureEnabledImpl(Features, "pconfig", true); setFeatureEnabledImpl(Features, "wbnoinvd", true); LLVM_FALLTHROUGH; case CK_IcelakeClient: +IcelakeCommon: setFeatureEnabledImpl(Features, "vaes", true); setFeatureEnabledImpl(Features, "gfni", true); setFeatureEnabledImpl(Features, "vpclmulqdq", true); @@ -1000,6 +1009,7 @@ void X86TargetInfo::getTargetDefines(con case CK_Cannonlake: case CK_IcelakeClient: case CK_IcelakeServer: + case CK_Tigerlake: // FIXME: Historically, we defined this legacy name, it would be nice to // remove it at some point. We've never exposed fine-grained names for // recent primary x86 CPUs, and we should keep it that way. Modified: cfe/trunk/test/Driver/x86-march.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/x86-march.c?rev=368543&r1=368542&r2=368543&view=diff == --- cfe/trunk/test/Driver/x86-march.c (original) +++ cfe/trunk/test/Driver/x86-march.c Sun Aug 11 18:29:46 2019 @@ -76,6 +76,10 @@ // RUN: | FileCheck %s -check-prefix=icelake-server // icelake-server: "-target-cpu" "icelake-server" // +// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=tigerlake 2>&1 \ +// RUN: | FileCheck %s -check-prefix=tigerlake +// tigerlake: "-target-cpu" "tigerlake" +// // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=lakemont 2>&1 \ // RUN: | FileCheck %s -check-prefix=lakemont // lakemont: "-target-cpu" "lakemont" Modified: cfe/trunk/test/Misc/target-invalid-cpu-note.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Misc/target-invalid-cpu-note.c?rev=368543&r1=368542&r2=368543&view=diff == --- cfe/trunk/test/Misc/target-invalid-cpu-note.c (original) +++ cfe/trunk/test/Misc/target-invalid-cpu-note.c Sun Aug 11 18:29:46 2019 @@ -16,7 +16,7 @@ // X86-SAME: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, // X86-SAME: nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, // X86-SAME: core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, -// X86-SAME: skx, cascadelake, cooperlake, cannonlake, icelake-client, icelake-server, knl, knm, lakemont, k6, k6-2, k6-3, +// X86-SAME: skx, casc
r365473 - [NFC] [X86] Fix scan-build complaining
Author: pengfei Date: Tue Jul 9 05:41:12 2019 New Revision: 365473 URL: http://llvm.org/viewvc/llvm-project?rev=365473&view=rev Log: [NFC] [X86] Fix scan-build complaining Summary: Remove unused variable. This fixes bug: https://bugs.llvm.org/show_bug.cgi?id=42526 Signed-off-by: pengfei Reviewers: RKSimon, xiangzhangllvm, craig.topper Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64389 Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=365473&r1=365472&r2=365473&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Jul 9 05:41:12 2019 @@ -11776,12 +11776,11 @@ Value *CodeGenFunction::EmitX86BuiltinEx Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]}); Value *Result = Builder.CreateExtractValue(Call, 0); Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); -Value *Store = Builder.CreateDefaultAlignedStore(Result, Ops[2]); +Builder.CreateDefaultAlignedStore(Result, Ops[2]); Result = Builder.CreateExtractValue(Call, 1); Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); -Store = Builder.CreateDefaultAlignedStore(Result, Ops[3]); -return Store; +return Builder.CreateDefaultAlignedStore(Result, Ops[3]); } case X86::BI__builtin_ia32_vpmultishiftqb128: ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r372802 - [x86] Adding support for some missing intrinsics: _castf32_u32, _castf64_u64, _castu32_f32, _castu64_f64
Author: pengfei Date: Tue Sep 24 19:24:05 2019 New Revision: 372802 URL: http://llvm.org/viewvc/llvm-project?rev=372802&view=rev Log: [x86] Adding support for some missing intrinsics: _castf32_u32, _castf64_u64, _castu32_f32, _castu64_f64 Summary: Adding support for some missing intrinsics: _castf32_u32, _castf64_u64, _castu32_f32, _castu64_f64 Reviewers: craig.topper, LuoYuanke, RKSimon, pengfei Reviewed By: RKSimon Subscribers: llvm-commits Patch by yubing (Bing Yu) Differential Revision: https://reviews.llvm.org/D67212 Added: cfe/trunk/test/CodeGen/x86-builtins.c (with props) Modified: cfe/trunk/lib/Headers/ia32intrin.h Modified: cfe/trunk/lib/Headers/ia32intrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/ia32intrin.h?rev=372802&r1=372801&r2=372802&view=diff == --- cfe/trunk/lib/Headers/ia32intrin.h (original) +++ cfe/trunk/lib/Headers/ia32intrin.h Tue Sep 24 19:24:05 2019 @@ -195,6 +195,74 @@ __writeeflags(unsigned int __f) } #endif /* !__x86_64__ */ +/** Cast a 32-bit float value to a 32-bit unsigned integer value + * + * \headerfile + * This intrinsic corresponds to the VMOVD / MOVD instruction in x86_64, + * and corresponds to the VMOVL / MOVL instruction in ia32. + * + * \param __A + * A 32-bit float value. + * \returns a 32-bit unsigned integer containing the converted value. + */ +static __inline__ unsigned int __attribute__((__always_inline__)) +_castf32_u32(float __A) { + unsigned int D; + __builtin_memcpy(&D, &__A, sizeof(__A)); + return D; +} + +/** Cast a 64-bit float value to a 64-bit unsigned integer value + * + * \headerfile + * This intrinsic corresponds to the VMOVQ / MOVQ instruction in x86_64, + * and corresponds to the VMOVL / MOVL instruction in ia32. + * + * \param __A + * A 64-bit float value. + * \returns a 64-bit unsigned integer containing the converted value. + */ +static __inline__ unsigned long long __attribute__((__always_inline__)) +_castf64_u64(double __A) { + unsigned long long D; + __builtin_memcpy(&D, &__A, sizeof(__A)); + return D; +} + +/** Cast a 32-bit unsigned integer value to a 32-bit float value + * + * \headerfile + * This intrinsic corresponds to the VMOVQ / MOVQ instruction in x86_64, + * and corresponds to the FLDS instruction in ia32. + * + * \param __A + * A 32-bit unsigned integer value. + * \returns a 32-bit float value containing the converted value. + */ +static __inline__ float __attribute__((__always_inline__)) +_castu32_f32(unsigned int __A) { + float D; + __builtin_memcpy(&D, &__A, sizeof(__A)); + return D; +} + +/** Cast a 64-bit unsigned integer value to a 64-bit float value + * + * \headerfile + * This intrinsic corresponds to the VMOVQ / MOVQ instruction in x86_64, + * and corresponds to the FLDL instruction in ia32. + * + * \param __A + * A 64-bit unsigned integer value. + * \returns a 64-bit float value containing the converted value. + */ +static __inline__ double __attribute__((__always_inline__)) +_castu64_f64(unsigned long long __A) { + double D; + __builtin_memcpy(&D, &__A, sizeof(__A)); + return D; +} + /** Adds the unsigned integer operand to the CRC-32C checksum of the * unsigned char operand. * Added: cfe/trunk/test/CodeGen/x86-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86-builtins.c?rev=372802&view=auto == --- cfe/trunk/test/CodeGen/x86-builtins.c (added) +++ cfe/trunk/test/CodeGen/x86-builtins.c Tue Sep 24 19:24:05 2019 @@ -0,0 +1,45 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=CHECK-64 +// RUN: %clang_cc1 -ffreestanding %s -triple=i386-unknown-unknown -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=CHECK-32 + +#include + +unsigned int test_castf32_u32 (float __A){ + // CHECK-64-LABEL: @test_castf32_u32 + // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i64 4, i1 false) + // CHECK-64: %{{.*}} = load i32, i32* %{{.*}}, align 4 + // CHECK-32-LABEL: @test_castf32_u32 + // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %{{.*}}, i8* align 4 %{{.*}}, i32 4, i1 false) + // CHECK-32: %{{.*}} = load i32, i32* %{{.*}}, align 4 + return _castf32_u32(__A); +} + +unsigned long long test_castf64_u64 (double __A){ + // CHECK-64-LABEL: @test_castf64_u64 + // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %{{.*}}, i8* align 8 %{{.*}}, i64 8, i1 false) + // CHECK-64: %{{.*}} = load i64, i64* %{{.*}}, align 8 + // CHECK-32-LABEL: @test_castf64_u64 + // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %{{.*}}, i8* align 8 %{{.*}}, i32 8, i1 false) + // CHECK-32: %{{.*}} = load i64, i64* %{{.*}}, align 8 + return _castf64_u64(__A); +} + +float test_cas
r370297 - [x86] Adding support for some missing intrinsics: _mm512_cvtsi512_si32
Author: pengfei Date: Wed Aug 28 23:18:34 2019 New Revision: 370297 URL: http://llvm.org/viewvc/llvm-project?rev=370297&view=rev Log: [x86] Adding support for some missing intrinsics: _mm512_cvtsi512_si32 Summary: Adding support for some missing intrinsics: _mm512_cvtsi512_si32 Reviewers: craig.topper, pengfei, LuoYuanke, spatel, RKSimon Reviewed By: craig.topper Subscribers: llvm-commits Patch by Bing Yu (yubing) Differential Revision: https://reviews.llvm.org/D66785 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=370297&r1=370296&r2=370297&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Wed Aug 28 23:18:34 2019 @@ -9659,6 +9659,23 @@ _mm512_mask_reduce_min_ps(__mmask16 __M, } #undef _mm512_mask_reduce_operator +/// Moves the least significant 32 bits of a vector of [16 x i32] to a +///32-bit signed integer value. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VMOVD / MOVD instruction. +/// +/// \param __A +///A vector of [16 x i32]. The least significant 32 bits are moved to the +///destination. +/// \returns A 32-bit signed integer containing the moved value. +static __inline__ int __DEFAULT_FN_ATTRS512 +_mm512_cvtsi512_si32(__m512i __A) { + __v16si __b = (__v16si)__A; + return __b[0]; +} + #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=370297&r1=370296&r2=370297&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Wed Aug 28 23:18:34 2019 @@ -4762,6 +4762,12 @@ unsigned test_mm_cvtsd_u32(__m128d __A) return _mm_cvtsd_u32(__A); } +int test_mm512_cvtsi512_si32(__m512i a) { + // CHECK-LABEL: test_mm512_cvtsi512_si32 + // CHECK: %{{.*}} = extractelement <16 x i32> %{{.*}}, i32 0 + return _mm512_cvtsi512_si32(a); +} + #ifdef __x86_64__ unsigned long long test_mm_cvt_roundsd_u64(__m128d __A) { // CHECK-LABEL: @test_mm_cvt_roundsd_u64 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r370691 - [x86] Fix bugs of some intrinsic functions in CLANG : _mm512_stream_ps, _mm512_stream_pd, _mm512_stream_si512
Author: pengfei Date: Mon Sep 2 19:06:15 2019 New Revision: 370691 URL: http://llvm.org/viewvc/llvm-project?rev=370691&view=rev Log: [x86] Fix bugs of some intrinsic functions in CLANG : _mm512_stream_ps, _mm512_stream_pd, _mm512_stream_si512 Reviewers: craig.topper, pengfei, LuoYuanke, RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Patch by Bing Yu (yubing) Differential Revision: https://reviews.llvm.org/D66786 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=370691&r1=370690&r2=370691&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Mon Sep 2 19:06:15 2019 @@ -8436,7 +8436,7 @@ _store_mask16(__mmask16 *__A, __mmask16 } static __inline__ void __DEFAULT_FN_ATTRS512 -_mm512_stream_si512 (__m512i * __P, __m512i __A) +_mm512_stream_si512 (void * __P, __m512i __A) { typedef __v8di __v8di_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P); @@ -8450,14 +8450,14 @@ _mm512_stream_load_si512 (void const *__ } static __inline__ void __DEFAULT_FN_ATTRS512 -_mm512_stream_pd (double *__P, __m512d __A) +_mm512_stream_pd (void *__P, __m512d __A) { typedef __v8df __v8df_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P); } static __inline__ void __DEFAULT_FN_ATTRS512 -_mm512_stream_ps (float *__P, __m512 __A) +_mm512_stream_ps (void *__P, __m512 __A) { typedef __v16sf __v16sf_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P); Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=370691&r1=370690&r2=370691&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Mon Sep 2 19:06:15 2019 @@ -8575,6 +8575,12 @@ void test_mm512_stream_si512(__m512i * _ _mm512_stream_si512(__P, __A); } +void test_mm512_stream_si512_2(void * __P, __m512i __A) { + // CHECK-LABEL: @test_mm512_stream_si512 + // CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 64, !nontemporal + _mm512_stream_si512(__P, __A); +} + __m512i test_mm512_stream_load_si512(void *__P) { // CHECK-LABEL: @test_mm512_stream_load_si512 // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 64, !nontemporal @@ -8593,12 +8599,23 @@ void test_mm512_stream_pd(double *__P, _ return _mm512_stream_pd(__P, __A); } +void test_mm512_stream_pd_2(void *__P, __m512d __A) { + // CHECK-LABEL: @test_mm512_stream_pd + // CHECK: store <8 x double> %{{.*}}, <8 x double>* %{{.*}}, align 64, !nontemporal + return _mm512_stream_pd(__P, __A); +} + void test_mm512_stream_ps(float *__P, __m512 __A) { // CHECK-LABEL: @test_mm512_stream_ps // CHECK: store <16 x float> %{{.*}}, <16 x float>* %{{.*}}, align 64, !nontemporal _mm512_stream_ps(__P, __A); } +void test_mm512_stream_ps_2(void *__P, __m512 __A) { + // CHECK-LABEL: @test_mm512_stream_ps + // CHECK: store <16 x float> %{{.*}}, <16 x float>* %{{.*}}, align 64, !nontemporal + _mm512_stream_ps(__P, __A); +} __m512d test_mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_mask_compress_pd // CHECK: @llvm.x86.avx512.mask.compress ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] af3a7de - [X86] add mayRaiseFPException flag and FPCW registers for X87 instructions
Author: Pengfei Wang Date: 2019-11-01T21:12:43-07:00 New Revision: af3a7de20c3f92f5aee828d03049032200b21f08 URL: https://github.com/llvm/llvm-project/commit/af3a7de20c3f92f5aee828d03049032200b21f08 DIFF: https://github.com/llvm/llvm-project/commit/af3a7de20c3f92f5aee828d03049032200b21f08.diff LOG: [X86] add mayRaiseFPException flag and FPCW registers for X87 instructions Summary: This patch adds flag "mayRaiseFPException" , FPCW and FPSW for X87 instructions which could raise float exception. Reviewers: pengfei, RKSimon, andrew.w.kaylor, uweigand, kpn, spatel, cameron.mcinally, craig.topper Reviewed By: craig.topper Subscribers: thakis, hiraditya, llvm-commits Patch by LiuChen. Differential Revision: https://reviews.llvm.org/D68854 Added: llvm/test/CodeGen/X86/x87-reg-usage.mir Modified: clang/test/CodeGen/ms-inline-asm.c llvm/lib/Target/X86/X86InstrFPStack.td llvm/lib/Target/X86/X86InstrFormats.td Removed: diff --git a/clang/test/CodeGen/ms-inline-asm.c b/clang/test/CodeGen/ms-inline-asm.c index edcbaa8aa455..ca9b937a7cde 100644 --- a/clang/test/CodeGen/ms-inline-asm.c +++ b/clang/test/CodeGen/ms-inline-asm.c @@ -752,7 +752,7 @@ void mxcsr() { __asm fxrstor buf } // CHECK-LABEL: define void @mxcsr -// CHECK: call void asm sideeffect inteldialect "fxrstor $0", "=*m,~{dirflag},~{fpsr},~{flags}" +// CHECK: call void asm sideeffect inteldialect "fxrstor $0", "=*m,~{fpcr},~{dirflag},~{fpsr},~{flags}" // Make sure we can find the register for the dirflag for popfd void dirflag() { diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td index 73cffc9aee4f..1b7a2ccde51f 100644 --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -282,7 +282,7 @@ def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), !strconcat("fi", asmstring, "{l}\t$src")>; } -let Defs = [FPSW], Uses = [FPCW] in { +let Uses = [FPCW], mayRaiseFPException = 1 in { // FPBinary_rr just defines pseudo-instructions, no need to set a scheduling // resources. let hasNoSchedulingInfo = 1 in { @@ -307,7 +307,7 @@ let SchedRW = [WriteFDivLd] in { defm DIV : FPBinary; defm DIVR: FPBinary; } -} // Defs = [FPSW] +} // Uses = [FPCW], mayRaiseFPException = 1 class FPST0rInst : FPI<0xD8, fp, (outs), (ins RSTi:$op), asm>; @@ -319,7 +319,7 @@ class FPrST0PInst // NOTE: GAS and apparently all other AT&T style assemblers have a broken notion // of some of the 'reverse' forms of the fsub and fdiv instructions. As such, // we have to put some 'r's in and take them out of weird places. -let SchedRW = [WriteFAdd], Defs = [FPSW], Uses = [FPCW] in { +let SchedRW = [WriteFAdd], Uses = [FPCW], mayRaiseFPException = 1 in { def ADD_FST0r : FPST0rInst ; def ADD_FrST0 : FPrST0Inst ; def ADD_FPrST0 : FPrST0PInst; @@ -330,16 +330,16 @@ def SUB_FST0r : FPST0rInst ; def SUBR_FrST0 : FPrST0Inst ; def SUBR_FPrST0 : FPrST0PInst; } // SchedRW -let SchedRW = [WriteFCom], Defs = [FPSW], Uses = [FPCW] in { +let SchedRW = [WriteFCom], Uses = [FPCW], mayRaiseFPException = 1 in { def COM_FST0r : FPST0rInst ; def COMP_FST0r : FPST0rInst ; } // SchedRW -let SchedRW = [WriteFMul], Defs = [FPSW], Uses = [FPCW] in { +let SchedRW = [WriteFMul], Uses = [FPCW], mayRaiseFPException = 1 in { def MUL_FST0r : FPST0rInst ; def MUL_FrST0 : FPrST0Inst ; def MUL_FPrST0 : FPrST0PInst; } // SchedRW -let SchedRW = [WriteFDiv], Defs = [FPSW], Uses = [FPCW] in { +let SchedRW = [WriteFDiv], Uses = [FPCW], mayRaiseFPException = 1 in { def DIVR_FST0r : FPST0rInst ; def DIV_FrST0 : FPrST0Inst ; def DIV_FPrST0 : FPrST0PInst; @@ -359,13 +359,12 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW, def _F : FPI<0xD9, fp, (outs), (ins), asmstring>; } -let Defs = [FPSW], Uses = [FPCW] in { - let SchedRW = [WriteFSign] in { defm CHS : FPUnary; defm ABS : FPUnary; } +let Uses = [FPCW], mayRaiseFPException = 1 in { let SchedRW = [WriteFSqrt80] in defm SQRT: FPUnary; @@ -378,11 +377,11 @@ def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>; def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">; } // SchedRW -} // Defs = [FPSW] +} // Uses = [FPCW], mayRaiseFPException = 1 // Versions of FP instructions that take a single memory operand. Added for the // disassembler; remove as they are included with patterns elsewhere. -let SchedRW = [WriteFComLd], Defs = [FPSW], Uses = [FPCW] in { +let SchedRW = [WriteFComLd], Uses = [FPCW], mayRaiseFPException = 1 in { def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">; def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">; @@ -397,14 +396,21 @@ def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">; } // SchedRW let SchedRW = [WriteMicrocoded] in { +let Defs = [FPSW,