craig.topper created this revision. craig.topper added reviewers: rnk, RKSimon, spatel.
This patch attempts to redo what was tried in r278783, but was reverted. These intrinsics should be available on non-windows platforms with "xsave" feature check. But on Windows platforms they shouldn't have feature check since that's how MSVC behaves. To accomplish this I've added a MS builtin with no feature check. And a normal gcc builtin with a feature check. When _MSC_VER is not defined _xgetbv/_xsetbv will be macros pointing to the gcc builtin name. I've moved the forward declarations from intrin.h to immintrin.h to match the MSDN documentation and used that as the header file for the MS builtin. I'm not super happy with this implementation, and I'm open to suggestions for better ways to do it. https://reviews.llvm.org/D56686 Files: include/clang/Basic/BuiltinsX86.def lib/CodeGen/CGBuiltin.cpp lib/Headers/immintrin.h lib/Headers/intrin.h lib/Headers/xsaveintrin.h test/CodeGen/builtins-x86.c test/CodeGen/x86_32-xsave.c test/CodeGen/x86_64-xsave.c test/Headers/ms-intrin.cpp
Index: test/Headers/ms-intrin.cpp =================================================================== --- test/Headers/ms-intrin.cpp +++ test/Headers/ms-intrin.cpp @@ -49,7 +49,9 @@ int info[4]; __cpuid(info, 0); __cpuidex(info, 0, 0); +#if defined(_M_X64) || defined(_M_IX86) _xgetbv(0); +#endif __halt(); __nop(); __readmsr(0); Index: test/CodeGen/x86_64-xsave.c =================================================================== --- test/CodeGen/x86_64-xsave.c +++ test/CodeGen/x86_64-xsave.c @@ -1,6 +1,9 @@ // RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVE // RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVE +// RUN: %clang_cc1 %s -DTEST_XGETBV -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XGETBV +// RUN: %clang_cc1 %s -DTEST_XSETBV -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSETBV + // RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVEOPT // RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVEOPT @@ -10,9 +13,16 @@ // RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaves -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVES // RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaves -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVES +// Don't include mm_malloc.h, it's system specific. +#define __MM_MALLOC_H +#include <x86intrin.h> + + void test() { - unsigned long long tmp_ULLi = 0; - void* tmp_vp = 0; + unsigned long long tmp_ULLi; + unsigned int tmp_Ui; + void* tmp_vp; + tmp_ULLi = 0; tmp_Ui = 0; tmp_vp = 0; #ifdef TEST_XSAVE // XSAVE: [[tmp_vp_1:%[0-9a-zA-Z]+]] = load i8*, i8** %tmp_vp, align 8 @@ -46,6 +56,18 @@ // XSAVE: [[low32_4:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_4]] to i32 // XSAVE: call void @llvm.x86.xrstor64(i8* [[tmp_vp_4]], i32 [[high32_4]], i32 [[low32_4]]) (void)__builtin_ia32_xrstor64(tmp_vp, tmp_ULLi); + +// XSAVE: call void @llvm.x86.xsave + (void)_xsave(tmp_vp, tmp_ULLi); + +// XSAVE: call void @llvm.x86.xsave64 + (void)_xsave64(tmp_vp, tmp_ULLi); + +// XSAVE: call void @llvm.x86.xrstor + (void)_xrstor(tmp_vp, tmp_ULLi); + +// XSAVE: call void @llvm.x86.xrstor64 + (void)_xrstor64(tmp_vp, tmp_ULLi); #endif #ifdef TEST_XSAVEOPT @@ -64,6 +86,12 @@ // XSAVEOPT: [[low32_2:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_2]] to i32 // XSAVEOPT: call void @llvm.x86.xsaveopt64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]]) (void)__builtin_ia32_xsaveopt64(tmp_vp, tmp_ULLi); + +// XSAVEOPT: call void @llvm.x86.xsaveopt + (void)_xsaveopt(tmp_vp, tmp_ULLi); + +// XSAVEOPT: call void @llvm.x86.xsaveopt64 + (void)_xsaveopt64(tmp_vp, tmp_ULLi); #endif #ifdef TEST_XSAVEC @@ -82,6 +110,12 @@ // XSAVEC: [[low32_2:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_2]] to i32 // XSAVEC: call void @llvm.x86.xsavec64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]]) (void)__builtin_ia32_xsavec64(tmp_vp, tmp_ULLi); + +// XSAVEC: call void @llvm.x86.xsavec + (void)_xsavec(tmp_vp, tmp_ULLi); + +// XSAVEC: call void @llvm.x86.xsavec64 + (void)_xsavec64(tmp_vp, tmp_ULLi); #endif #ifdef TEST_XSAVES @@ -116,5 +150,39 @@ // XSAVES: [[low32_4:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_4]] to i32 // XSAVES: call void @llvm.x86.xrstors64(i8* [[tmp_vp_4]], i32 [[high32_4]], i32 [[low32_4]]) (void)__builtin_ia32_xrstors64(tmp_vp, tmp_ULLi); + +// XSAVES: call void @llvm.x86.xsaves + (void)_xsaves(tmp_vp, tmp_ULLi); + +// XSAVES: call void @llvm.x86.xsaves64 + (void)_xsaves64(tmp_vp, tmp_ULLi); + +// XSAVES: call void @llvm.x86.xrstors + (void)_xrstors(tmp_vp, tmp_ULLi); + +// XSAVES: call void @llvm.x86.xrstors64 + (void)_xrstors64(tmp_vp, tmp_ULLi); +#endif + +#ifdef TEST_XGETBV +// XGETBV: [[tmp_Ui:%[0-9a-zA-z]+]] = load i32, i32* %tmp_Ui, align 4 +// XGETBV: call i64 @llvm.x86.xgetbv(i32 [[tmp_Ui]]) + tmp_ULLi = __builtin_ia32_xgetbv(tmp_Ui); + +// XGETBV: call i64 @llvm.x86.xgetbv + tmp_ULLi = _xgetbv(tmp_Ui); +#endif + +#ifdef TEST_XSETBV +// XSETBV: [[tmp_Ui:%[0-9a-zA-z]+]] = load i32, i32* %tmp_Ui, align 4 +// XSETBV: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8 +// XSETBV: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32 +// XSETBV: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32 +// XSETBV: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32 +// XSETBV: call void @llvm.x86.xsetbv(i32 [[tmp_Ui]], i32 [[high32_3]], i32 [[low32_3]]) + (void)__builtin_ia32_xsetbv(tmp_Ui, tmp_ULLi); + + // XSETBV: call void @llvm.x86.xsetbv + (void)_xsetbv(tmp_Ui, tmp_ULLi); #endif } Index: test/CodeGen/x86_32-xsave.c =================================================================== --- test/CodeGen/x86_32-xsave.c +++ test/CodeGen/x86_32-xsave.c @@ -1,6 +1,9 @@ // RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=i686-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVE // RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=i686-unknown-unknown -target-feature +xsave -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVE +// RUN: %clang_cc1 %s -DTEST_XGETBV -O0 -triple=i686-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XGETBV +// RUN: %clang_cc1 %s -DTEST_XSETBV -O0 -triple=i686-unknown-unknown -target-feature +xsave -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSETBV + // RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVEOPT // RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVEOPT @@ -10,9 +13,15 @@ // RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaves -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVES // RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaves -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=XSAVES +// Don't include mm_malloc.h, it's system specific. +#define __MM_MALLOC_H +#include <x86intrin.h> + void test() { - unsigned long long tmp_ULLi = 0; - void* tmp_vp = 0; + unsigned long long tmp_ULLi; + unsigned int tmp_Ui; + void* tmp_vp; + tmp_ULLi = 0; tmp_Ui = 0; tmp_vp = 0; #ifdef TEST_XSAVE // XSAVE: [[tmp_vp_1:%[0-9a-zA-Z]+]] = load i8*, i8** %tmp_vp, align 4 @@ -30,6 +39,12 @@ // XSAVE: [[low32_3:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_3]] to i32 // XSAVE: call void @llvm.x86.xrstor(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]]) (void)__builtin_ia32_xrstor(tmp_vp, tmp_ULLi); + +// XSAVE: call void @llvm.x86.xsave + (void)_xsave(tmp_vp, tmp_ULLi); + +// XSAVE: call void @llvm.x86.xrstor + (void)_xrstor(tmp_vp, tmp_ULLi); #endif #ifdef TEST_XSAVEOPT @@ -40,6 +55,9 @@ // XSAVEOPT: [[low32_1:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_1]] to i32 // XSAVEOPT: call void @llvm.x86.xsaveopt(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]]) (void)__builtin_ia32_xsaveopt(tmp_vp, tmp_ULLi); + +// XSAVEOPT: call void @llvm.x86.xsaveopt + (void)_xsaveopt(tmp_vp, tmp_ULLi); #endif #ifdef TEST_XSAVEC @@ -50,6 +68,9 @@ // XSAVEC: [[low32_1:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_1]] to i32 // XSAVEC: call void @llvm.x86.xsavec(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]]) (void)__builtin_ia32_xsavec(tmp_vp, tmp_ULLi); + +// XSAVEC: call void @llvm.x86.xsavec + (void)_xsavec(tmp_vp, tmp_ULLi); #endif #ifdef TEST_XSAVES @@ -68,5 +89,34 @@ // XSAVES: [[low32_3:%[0-9a-zA-Z]+]] = trunc i64 [[tmp_ULLi_3]] to i32 // XSAVES: call void @llvm.x86.xrstors(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]]) (void)__builtin_ia32_xrstors(tmp_vp, tmp_ULLi); + +// XSAVES: call void @llvm.x86.xsaves + (void)_xsaves(tmp_vp, tmp_ULLi); + +// XSAVES: call void @llvm.x86.xrstors + (void)_xrstors(tmp_vp, tmp_ULLi); +#endif + +#ifdef TEST_XGETBV +// XGETBV: [[tmp_Ui:%[0-9a-zA-z]+]] = load i32, i32* %tmp_Ui, align 4 +// XGETBV: call i64 @llvm.x86.xgetbv(i32 [[tmp_Ui]]) + tmp_ULLi = __builtin_ia32_xgetbv(tmp_Ui); + +// XGETBV: call i64 @llvm.x86.xgetbv + tmp_ULLi = _xgetbv(tmp_Ui); +#endif + +#ifdef TEST_XSETBV +// XSETBV: [[tmp_Ui:%[0-9a-zA-z]+]] = load i32, i32* %tmp_Ui, align 4 +// XSETBV: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8 +// XSETBV: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32 +// XSETBV: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32 +// XSETBV: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32 +// XSETBV: call void @llvm.x86.xsetbv(i32 [[tmp_Ui]], i32 [[high32_3]], i32 [[low32_3]]) + (void)__builtin_ia32_xsetbv(tmp_Ui, tmp_ULLi); + + // XSETBV: call void @llvm.x86.xsetbv + (void)_xsetbv(tmp_Ui, tmp_ULLi); #endif + } Index: test/CodeGen/builtins-x86.c =================================================================== --- test/CodeGen/builtins-x86.c +++ test/CodeGen/builtins-x86.c @@ -281,6 +281,8 @@ (void)__builtin_ia32_xsave(tmp_vp, tmp_ULLi); (void)__builtin_ia32_xsave64(tmp_vp, tmp_ULLi); + tmp_ULLi = __builtin_ia32_xgetbv(tmp_Ui); + (void)__builtin_ia32_xsetbv(tmp_Ui, tmp_ULLi); (void)__builtin_ia32_xrstor(tmp_vp, tmp_ULLi); (void)__builtin_ia32_xrstor64(tmp_vp, tmp_ULLi); (void)__builtin_ia32_xsaveopt(tmp_vp, tmp_ULLi); Index: lib/Headers/xsaveintrin.h =================================================================== --- lib/Headers/xsaveintrin.h +++ lib/Headers/xsaveintrin.h @@ -28,6 +28,10 @@ #ifndef __XSAVEINTRIN_H #define __XSAVEINTRIN_H +#ifndef _MSC_VER +#define _XCR_XFEATURE_ENABLED_MASK 0 +#endif + /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsave"))) @@ -41,6 +45,12 @@ __builtin_ia32_xrstor(__p, __m); } +#ifndef _MSC_VER +#define _xgetbv(A) __builtin_ia32_xgetbv((long long)(A)) + +#define _xsetbv(A, B) __builtin_ia32_xsetbv((unsigned int)(A), (unsigned long long)(B)); +#endif + #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _xsave64(void *__p, unsigned long long __m) { @@ -51,6 +61,7 @@ _xrstor64(void *__p, unsigned long long __m) { __builtin_ia32_xrstor64(__p, __m); } + #endif #undef __DEFAULT_FN_ATTRS Index: lib/Headers/intrin.h =================================================================== --- lib/Headers/intrin.h +++ lib/Headers/intrin.h @@ -200,10 +200,7 @@ _WriteBarrier(void); unsigned __int32 xbegin(void); void _xend(void); -static __inline__ #define _XCR_XFEATURE_ENABLED_MASK 0 -unsigned __int64 __cdecl _xgetbv(unsigned int); -void __cdecl _xsetbv(unsigned int, unsigned __int64); /* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */ #ifdef __x86_64__ @@ -539,12 +536,6 @@ __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3]) : "a"(__level), "c"(__ecx)); } -static __inline__ unsigned __int64 __cdecl __DEFAULT_FN_ATTRS -_xgetbv(unsigned int __xcr_no) { - unsigned int __eax, __edx; - __asm__ ("xgetbv" : "=a" (__eax), "=d" (__edx) : "c" (__xcr_no)); - return ((unsigned __int64)__edx << 32) | __eax; -} static __inline__ void __DEFAULT_FN_ATTRS __halt(void) { __asm__ volatile ("hlt"); Index: lib/Headers/immintrin.h =================================================================== --- lib/Headers/immintrin.h +++ lib/Headers/immintrin.h @@ -378,6 +378,17 @@ #include <fxsrintrin.h> #endif +#ifdef _MSC_VER +#ifdef __cplusplus +extern "C" { +#endif +unsigned __int64 __cdecl _xgetbv(unsigned int); +void __cdecl _xsetbv(unsigned int, unsigned __int64); +#ifdef __cplusplus +} +#endif +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__) #include <xsaveintrin.h> #endif Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -9833,7 +9833,9 @@ case X86::BI__builtin_ia32_xsavec: case X86::BI__builtin_ia32_xsavec64: case X86::BI__builtin_ia32_xsaves: - case X86::BI__builtin_ia32_xsaves64: { + case X86::BI__builtin_ia32_xsaves64: + case X86::BI__builtin_ia32_xsetbv: + case X86::BI_xsetbv: { Intrinsic::ID ID; #define INTRINSIC_X86_XSAVE_ID(NAME) \ case X86::BI__builtin_ia32_##NAME: \ @@ -9853,6 +9855,10 @@ INTRINSIC_X86_XSAVE_ID(xsavec64); INTRINSIC_X86_XSAVE_ID(xsaves); INTRINSIC_X86_XSAVE_ID(xsaves64); + INTRINSIC_X86_XSAVE_ID(xsetbv); + case X86::BI_xsetbv: + ID = Intrinsic::x86_xsetbv; + break; } #undef INTRINSIC_X86_XSAVE_ID Value *Mhi = Builder.CreateTrunc( @@ -9862,6 +9868,9 @@ Ops.push_back(Mlo); return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); } + case X86::BI__builtin_ia32_xgetbv: + case X86::BI_xgetbv: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops); case X86::BI__builtin_ia32_storedqudi128_mask: case X86::BI__builtin_ia32_storedqusi128_mask: case X86::BI__builtin_ia32_storedquhi128_mask: Index: include/clang/Basic/BuiltinsX86.def =================================================================== --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -693,6 +693,10 @@ // XSAVE TARGET_BUILTIN(__builtin_ia32_xsave, "vv*ULLi", "n", "xsave") TARGET_BUILTIN(__builtin_ia32_xrstor, "vv*ULLi", "n", "xsave") +TARGET_BUILTIN(__builtin_ia32_xgetbv, "ULLiUi", "n", "xsave") +TARGET_HEADER_BUILTIN(_xgetbv, "UWiUi", "nh", "immintrin.h", ALL_MS_LANGUAGES, "") +TARGET_BUILTIN(__builtin_ia32_xsetbv, "vUiULLi", "n", "xsave") +TARGET_HEADER_BUILTIN(_xsetbv, "vUiUWi", "nh", "immintrin.h", ALL_MS_LANGUAGES, "") TARGET_BUILTIN(__builtin_ia32_xsaveopt, "vv*ULLi", "n", "xsaveopt") TARGET_BUILTIN(__builtin_ia32_xrstors, "vv*ULLi", "n", "xsaves") TARGET_BUILTIN(__builtin_ia32_xsavec, "vv*ULLi", "n", "xsavec")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits