Author: ctopper Date: Thu May 16 11:28:17 2019 New Revision: 360924 URL: http://llvm.org/viewvc/llvm-project?rev=360924&view=rev Log: [X86] Stop implicitly enabling avx512vl when avx512bf16 is enabled.
Previously we were doing this so that the 256 bit selectw builtin could be used in the implementation of the 512->256 bit conversion intrinsic. After this commit we now use a masked convert builtin that will emit the intrinsic call and the 256-bit select from custom code in CGBuiltin. Then the header only needs to call that one intrinsic. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512bf16intrin.h cfe/trunk/lib/Headers/avx512vlbf16intrin.h cfe/trunk/test/Preprocessor/x86_target_features.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=360924&r1=360923&r2=360924&view=diff ============================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu May 16 11:28:17 2019 @@ -1831,24 +1831,15 @@ TARGET_BUILTIN(__builtin_ia32_cvtusi2ss3 TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512, "V64cV64cV64c", "ncV:512:", "avx512vbmi") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_128, "V8sV4fV4f", "ncV:128:", - "avx512bf16,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_256, "V16sV8fV8f", "ncV:256:", - "avx512bf16,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_512, "V32sV16fV16f", "ncV:512:", - "avx512bf16") -TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_128_mask, "V8sV4fV8sUc", "ncV:128:", - "avx512bf16,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_256, "V8sV8f", "ncV:256:", - "avx512bf16,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_512, "V16sV16f", "ncV:512:", - "avx512bf16") -TARGET_BUILTIN(__builtin_ia32_dpbf16ps_128, "V4fV4fV4iV4i", "ncV:128:", - "avx512bf16,avx512vl") -TARGET_BUILTIN(__builtin_ia32_dpbf16ps_256, "V8fV8fV8iV8i", "ncV:256:", - "avx512bf16,avx512vl") -TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV16iV16i", "ncV:512:", - "avx512bf16") +TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_128, "V8sV4fV4f", "ncV:128:", "avx512bf16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_256, "V16sV8fV8f", "ncV:256:", "avx512bf16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtne2ps2bf16_512, "V32sV16fV16f", "ncV:512:", "avx512bf16") +TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_128_mask, "V8sV4fV8sUc", "ncV:128:", "avx512bf16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_256_mask, "V8sV8fV8sUc", "ncV:256:", "avx512bf16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_512_mask, "V16sV16fV16sUs", "ncV:512:", "avx512bf16") +TARGET_BUILTIN(__builtin_ia32_dpbf16ps_128, "V4fV4fV4iV4i", "ncV:128:", "avx512bf16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_dpbf16ps_256, "V8fV8fV8iV8i", "ncV:256:", "avx512bf16,avx512vl") +TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV16iV16i", "ncV:512:", "avx512bf16") // generic select intrinsics TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl") Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=360924&r1=360923&r2=360924&view=diff ============================================================================== --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Thu May 16 11:28:17 2019 @@ -661,14 +661,12 @@ void X86TargetInfo::setFeatureEnabledImp if ((Name.startswith("avx512vbmi") || Name == "avx512bitalg") && Enabled) Features["avx512bw"] = true; if (Name == "avx512bf16" && Enabled) - Features["avx512bw"] = Features["avx512vl"] = true; + Features["avx512bw"] = true; // Also disable VBMI/VBMI2/BITALG if BWI is being disabled. if (Name == "avx512bw" && !Enabled) Features["avx512vbmi"] = Features["avx512vbmi2"] = Features["avx512bf16"] = Features["avx512bitalg"] = false; - if (Name == "avx512vl" && !Enabled) - Features["avx512bf16"] = false; } else if (Name == "fma") { if (Enabled) setSSELevel(Features, AVX, Enabled); Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=360924&r1=360923&r2=360924&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu May 16 11:28:17 2019 @@ -11885,6 +11885,22 @@ Value *CodeGenFunction::EmitX86BuiltinEx return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } + case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: + case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: + IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256; + break; + case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: + IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512; + break; + } + Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]); + return EmitX86Select(*this, Ops[2], Res, Ops[1]); + } + case X86::BI__emul: case X86::BI__emulu: { llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); Modified: cfe/trunk/lib/Headers/avx512bf16intrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bf16intrin.h?rev=360924&r1=360923&r2=360924&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avx512bf16intrin.h (original) +++ cfe/trunk/lib/Headers/avx512bf16intrin.h Thu May 16 11:28:17 2019 @@ -95,7 +95,9 @@ _mm512_maskz_cvtne2ps_pbh(__mmask32 __U, /// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A. static __inline__ __m256bh __DEFAULT_FN_ATTRS512 _mm512_cvtneps_pbh(__m512 __A) { - return (__m256bh)__builtin_ia32_cvtneps2bf16_512((__v16sf) __A); + return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, + (__v16hi)_mm256_undefined_si256(), + (__mmask16)-1); } /// Convert Packed Single Data to Packed BF16 Data. @@ -114,9 +116,9 @@ _mm512_cvtneps_pbh(__m512 __A) { /// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A. static __inline__ __m256bh __DEFAULT_FN_ATTRS512 _mm512_mask_cvtneps_pbh(__m256bh __W, __mmask16 __U, __m512 __A) { - return (__m256bh)__builtin_ia32_selectw_256((__mmask16)__U, - (__v16hi)_mm512_cvtneps_pbh(__A), - (__v16hi)__W); + return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, + (__v16hi)__W, + (__mmask16)__U); } /// Convert Packed Single Data to Packed BF16 Data. @@ -133,9 +135,9 @@ _mm512_mask_cvtneps_pbh(__m256bh __W, __ /// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A. static __inline__ __m256bh __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtneps_pbh(__mmask16 __U, __m512 __A) { - return (__m256bh)__builtin_ia32_selectw_256((__mmask16)__U, - (__v16hi)_mm512_cvtneps_pbh(__A), - (__v16hi)_mm256_setzero_si256()); + return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, + (__v16hi)_mm256_setzero_si256(), + (__mmask16)__U); } /// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. Modified: cfe/trunk/lib/Headers/avx512vlbf16intrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlbf16intrin.h?rev=360924&r1=360923&r2=360924&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avx512vlbf16intrin.h (original) +++ cfe/trunk/lib/Headers/avx512vlbf16intrin.h Thu May 16 11:28:17 2019 @@ -220,7 +220,9 @@ _mm_maskz_cvtneps_pbh(__mmask8 __U, __m1 /// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A. static __inline__ __m128bh __DEFAULT_FN_ATTRS256 _mm256_cvtneps_pbh(__m256 __A) { - return (__m128bh)__builtin_ia32_cvtneps2bf16_256((__v8sf)__A); + return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, + (__v8hi)_mm_undefined_si128(), + (__mmask8)-1); } /// Convert Packed Single Data to Packed BF16 Data. @@ -239,9 +241,9 @@ _mm256_cvtneps_pbh(__m256 __A) { /// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A. static __inline__ __m128bh __DEFAULT_FN_ATTRS256 _mm256_mask_cvtneps_pbh(__m128bh __W, __mmask8 __U, __m256 __A) { - return (__m128bh)__builtin_ia32_selectw_128((__mmask8)__U, - (__v8hi)_mm256_cvtneps_pbh(__A), - (__v8hi)__W); + return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, + (__v8hi)__W, + (__mmask8)__U); } /// Convert Packed Single Data to Packed BF16 Data. @@ -258,9 +260,9 @@ _mm256_mask_cvtneps_pbh(__m128bh __W, __ /// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A. static __inline__ __m128bh __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtneps_pbh(__mmask8 __U, __m256 __A) { - return (__m128bh)__builtin_ia32_selectw_128((__mmask8)__U, - (__v8hi)_mm256_cvtneps_pbh(__A), - (__v8hi)_mm_setzero_si128()); + return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, + (__v8hi)_mm_setzero_si128(), + (__mmask8)__U); } /// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. Modified: cfe/trunk/test/Preprocessor/x86_target_features.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/x86_target_features.c?rev=360924&r1=360923&r2=360924&view=diff ============================================================================== --- cfe/trunk/test/Preprocessor/x86_target_features.c (original) +++ cfe/trunk/test/Preprocessor/x86_target_features.c Thu May 16 11:28:17 2019 @@ -448,7 +448,7 @@ // AVX512BF16: #define __AVX512BF16__ 1 // AVX512BF16: #define __AVX512BW__ 1 -// AVX512BF16: #define __AVX512VL__ 1 +// AVX512BF16-NOT: #define __AVX512VL__ 1 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512BW %s @@ -456,5 +456,5 @@ // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bf16 -mno-avx512vl -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BF16_NOAVX512VL %s -// AVX512BF16_NOAVX512VL-NOT: #define __AVX512BF16__ 1 +// AVX512BF16_NOAVX512VL: #define __AVX512BF16__ 1 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits