llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) <details> <summary>Changes</summary> Fixes: #<!-- -->136209 --- Patch is 158.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137450.diff 41 Files Affected: - (modified) clang/lib/Headers/avx512fp16intrin.h (+2-1) - (modified) clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c (+1-1) - (modified) clang/test/CodeGen/X86/avx512fp16-builtins.c (+15-15) - (modified) clang/test/Preprocessor/x86_target_features.c (+2-9) - (modified) llvm/lib/Target/X86/X86.td (+2-2) - (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+10-6) - (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+4-4) - (modified) llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll (+1-1) - (modified) llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll (+2-2) - (modified) llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll (+1-1) - (modified) llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll (+2-2) - (modified) llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll (+1-1) - (modified) llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll (+1-1) - (modified) llvm/test/CodeGen/X86/avx512fp16-cvt.ll (+4-2) - (modified) llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll (+13-10) - (modified) llvm/test/CodeGen/X86/avx512fp16-fmaxnum.ll (+1-1) - (modified) llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll (+1-1) - (modified) llvm/test/CodeGen/X86/avx512fp16-fminnum.ll (+1-1) - (modified) llvm/test/CodeGen/X86/avx512fp16-fp-logic.ll (+3-3) - (modified) llvm/test/CodeGen/X86/avx512fp16-frem.ll (+1-1) - (modified) llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll (+121-8) - (modified) llvm/test/CodeGen/X86/avx512fp16-machine-combiner.ll (+2-2) - (modified) llvm/test/CodeGen/X86/avx512fp16-mov.ll (+14-8) - (modified) llvm/test/CodeGen/X86/avx512fp16-rndscale.ll (+1-1) - (modified) llvm/test/CodeGen/X86/avx512fp16-unsafe-fp-math.ll (+2-2) - (modified) llvm/test/CodeGen/X86/avx512fp16vl-fma-intrinsics.ll (+22-11) - (modified) llvm/test/CodeGen/X86/fp-round-with-concat-vector-undef-elem.ll (+1-1) - (modified) llvm/test/CodeGen/X86/fp16-libcalls.ll (+5-2) - (modified) llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16-fma.ll (+96-48) - (modified) llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll (+25-13) - (modified) llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl-fma.ll (+48-24) - (modified) llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll (+32-16) - (modified) llvm/test/CodeGen/X86/vec-strict-cmp-128-fp16.ll (+80-88) - (modified) llvm/test/CodeGen/X86/vec-strict-fptoint-128-fp16.ll (+14-10) - (modified) llvm/test/CodeGen/X86/vec-strict-fptoint-256-fp16.ll (+2-2) - (modified) llvm/test/CodeGen/X86/vec-strict-fptoint-512-fp16.ll (+4-2) - (modified) llvm/test/CodeGen/X86/vec-strict-inttofp-128-fp16.ll (+2-2) - (modified) llvm/test/CodeGen/X86/vec-strict-inttofp-512-fp16.ll (+2-2) - (modified) llvm/test/CodeGen/X86/vec_fabs.ll (+14-14) - (modified) llvm/test/CodeGen/X86/vec_fcopysign.ll (+2-2) - (modified) llvm/test/CodeGen/X86/vec_fneg.ll (+14-14) ``````````diff diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h index e136aa14a194c..92df320b45006 100644 --- a/clang/lib/Headers/avx512fp16intrin.h +++ b/clang/lib/Headers/avx512fp16intrin.h @@ -553,7 +553,8 @@ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A) { } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_conj_pch(__m512h __A) { - return (__m512h)_mm512_xor_ps((__m512)__A, _mm512_set1_ps(-0.0f)); + return (__m512h)_mm512_xor_epi32((__m512i)__A, + _mm512_set1_epi32(-2147483648)); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c b/clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c index 1a164ff57fda1..ffef29d17e542 100644 --- a/clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c +++ b/clang/test/CodeGen/X86/avx512fp16-builtins-constrained-cmp.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512fp16 -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512fp16 -target-feature +avx512vl -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CodeGen/X86/avx512fp16-builtins.c index a766476ca92bd..d277d053147fd 100644 --- a/clang/test/CodeGen/X86/avx512fp16-builtins.c +++ b/clang/test/CodeGen/X86/avx512fp16-builtins.c @@ -689,24 +689,24 @@ __m512h test_mm512_abs_ph(__m512h a) { __m512h test_mm512_conj_pch(__m512h __A) { // CHECK-LABEL: @test_mm512_conj_pch - // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <16 x float> - // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32> - // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32> + // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <8 x i64> + // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32> + // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32> // CHECK: %{{.*}} = xor <16 x i32> %{{.*}}, %{{.*}} - // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <16 x float> - // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <32 x half> + // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <8 x i64> + // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x half> return _mm512_conj_pch(__A); } __m512h test_mm512_mask_conj_pch(__m512h __W, __mmask32 __U, __m512h __A) { // CHECK-LABEL: @test_mm512_mask_conj_pch // CHECK: %{{.*}} = trunc i32 %{{.*}} to i16 - // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <16 x float> - // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32> - // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32> + // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <8 x i64> + // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32> + // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32> // CHECK: %{{.*}} = xor <16 x i32> %{{.*}}, %{{.*}} - // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <16 x float> - // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <32 x half> + // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <8 x i64> + // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x half> // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <16 x float> // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} @@ -717,12 +717,12 @@ __m512h test_mm512_mask_conj_pch(__m512h __W, __mmask32 __U, __m512h __A) { __m512h test_mm512_maskz_conj_pch(__mmask32 __U, __m512h __A) { // CHECK-LABEL: @test_mm512_maskz_conj_pch // CHECK: %{{.*}} = trunc i32 %{{.*}} to i16 - // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <16 x float> - // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32> - // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <16 x i32> + // CHECK: %{{.*}} = bitcast <32 x half> %{{.*}} to <8 x i64> + // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32> + // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <16 x i32> // CHECK: %{{.*}} = xor <16 x i32> %{{.*}}, %{{.*}} - // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <16 x float> - // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <32 x half> + // CHECK: %{{.*}} = bitcast <16 x i32> %{{.*}} to <8 x i64> + // CHECK: %{{.*}} = bitcast <8 x i64> %{{.*}} to <32 x half> // CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> // CHECK: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} // CHECK: %{{.*}} = bitcast <16 x float> %{{.*}} to <32 x half> diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index 63222a882ff53..3edc92c75303a 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -596,31 +596,24 @@ // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16 %s // AVX512FP16: #define __AVX512BW__ 1 -// AVX512FP16: #define __AVX512DQ__ 1 // AVX512FP16: #define __AVX512FP16__ 1 -// AVX512FP16: #define __AVX512VL__ 1 -// AVX512FP16: #define __EVEX256__ 1 // AVX512FP16: #define __EVEX512__ 1 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -mno-avx512vl -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16NOAVX512VL %s -// AVX512FP16NOAVX512VL-NOT: #define __AVX512FP16__ 1 -// AVX512FP16NOAVX512VL-NOT: #define __AVX512VL__ 1 -// AVX512FP16NOAVX512VL-NOT: #define __EVEX256__ 1 +// AVX512FP16NOAVX512VL: #define __AVX512FP16__ 1 // AVX512FP16NOAVX512VL: #define __EVEX512__ 1 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16NOAVX512BW %s // AVX512FP16NOAVX512BW-NOT: #define __AVX512BW__ 1 // AVX512FP16NOAVX512BW-NOT: #define __AVX512FP16__ 1 -// AVX512FP16NOAVX512BW: #define __EVEX256__ 1 // AVX512FP16NOAVX512BW: #define __EVEX512__ 1 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512fp16 -mno-avx512dq -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512FP16NOAVX512DQ %s // AVX512FP16NOAVX512DQ-NOT: #define __AVX512DQ__ 1 -// AVX512FP16NOAVX512DQ-NOT: #define __AVX512FP16__ 1 -// AVX512FP16NOAVX512DQ: #define __EVEX256__ 1 +// AVX512FP16NOAVX512DQ: #define __AVX512FP16__ 1 // AVX512FP16NOAVX512DQ: #define __EVEX512__ 1 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512f -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 577428cad6d61..eacf9e7a7fb62 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -173,7 +173,7 @@ def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect", // currently. def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true", "Support 16-bit floating point", - [FeatureBWI, FeatureVLX, FeatureDQI]>; + [FeatureBWI]>; def FeatureAVXVNNIINT8 : SubtargetFeature<"avxvnniint8", "HasAVXVNNIINT8", "true", "Enable AVX-VNNI-INT8", @@ -338,7 +338,7 @@ def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true", "Support AVX10.1 up to 256-bit instruction", [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI, FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG, - FeatureFP16]>; + FeatureFP16, FeatureVLX, FeatureDQI]>; def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true", "Support AVX10.1 up to 512-bit instruction", [FeatureAVX10_1, FeatureEVEX512]>; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0fc50dc1a87b6..9a91caaddd1c6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2024,13 +2024,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSHL, MVT::v16i32, Custom); setOperationAction(ISD::FSHR, MVT::v16i32, Custom); - if (Subtarget.hasDQI()) { + if (Subtarget.hasDQI() || Subtarget.hasFP16()) for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP, ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) setOperationAction(Opc, MVT::v8i64, Custom); + + if (Subtarget.hasDQI()) setOperationAction(ISD::MUL, MVT::v8i64, Legal); - } if (Subtarget.hasCDI()) { // NonVLX sub-targets extend 128/256 vectors to use the 512 version. @@ -19850,7 +19851,7 @@ static SDValue promoteXINT_TO_FP(SDValue Op, const SDLoc &dl, DAG.getNode(Op.getOpcode(), dl, NVT, Src), Rnd); } -static bool isLegalConversion(MVT VT, bool IsSigned, +static bool isLegalConversion(MVT VT, MVT FloatVT, bool IsSigned, const X86Subtarget &Subtarget) { if (VT == MVT::v4i32 && Subtarget.hasSSE2() && IsSigned) return true; @@ -19861,6 +19862,8 @@ static bool isLegalConversion(MVT VT, bool IsSigned, if (Subtarget.useAVX512Regs()) { if (VT == MVT::v16i32) return true; + if (VT == MVT::v8i64 && FloatVT == MVT::v8f16 && Subtarget.hasFP16()) + return true; if (VT == MVT::v8i64 && Subtarget.hasDQI()) return true; } @@ -19882,7 +19885,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, if (isSoftF16(VT, Subtarget)) return promoteXINT_TO_FP(Op, dl, DAG); - else if (isLegalConversion(SrcVT, true, Subtarget)) + else if (isLegalConversion(SrcVT, VT, true, Subtarget)) return Op; if (Subtarget.isTargetWin64() && SrcVT == MVT::i128) @@ -20386,7 +20389,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, if (isSoftF16(DstVT, Subtarget)) return promoteXINT_TO_FP(Op, dl, DAG); - else if (isLegalConversion(SrcVT, false, Subtarget)) + else if (isLegalConversion(SrcVT, DstVT, false, Subtarget)) return Op; if (DstVT.isVector()) @@ -21409,7 +21412,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { {NVT, MVT::Other}, {Chain, Src})}); return DAG.getNode(Op.getOpcode(), dl, VT, DAG.getNode(ISD::FP_EXTEND, dl, NVT, Src)); - } else if (isTypeLegal(SrcVT) && isLegalConversion(VT, IsSigned, Subtarget)) { + } else if (isTypeLegal(SrcVT) && + isLegalConversion(VT, SrcVT, IsSigned, Subtarget)) { return Op; } diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 2ae6dd6b3d1ef..21d05ee389e64 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -135,7 +135,7 @@ constexpr FeatureBitset FeaturesSapphireRapids = FeatureAVX512BF16 | FeatureAVX512FP16 | FeatureAVXVNNI | FeatureCLDEMOTE | FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE | FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureUINTR | - FeatureWAITPKG; + FeatureWAITPKG | FeatureAVX512DQ | FeatureAVX512VL; constexpr FeatureBitset FeaturesGraniteRapids = FeaturesSapphireRapids | FeatureAMX_FP16 | FeaturePREFETCHI; constexpr FeatureBitset FeaturesDiamondRapids = @@ -624,8 +624,7 @@ constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT8 = FeatureAVX2; constexpr FeatureBitset ImpliedFeaturesAVXIFMA = FeatureAVX2; constexpr FeatureBitset ImpliedFeaturesAVXNECONVERT = FeatureAVX2; constexpr FeatureBitset ImpliedFeaturesSHA512 = FeatureAVX2; -constexpr FeatureBitset ImpliedFeaturesAVX512FP16 = - FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL; +constexpr FeatureBitset ImpliedFeaturesAVX512FP16 = FeatureAVX512BW; // Key Locker Features constexpr FeatureBitset ImpliedFeaturesKL = FeatureSSE2; constexpr FeatureBitset ImpliedFeaturesWIDEKL = FeatureKL; @@ -637,7 +636,8 @@ constexpr FeatureBitset ImpliedFeaturesAVXVNNI = FeatureAVX2; constexpr FeatureBitset ImpliedFeaturesAVX10_1 = FeatureAVX512CD | FeatureAVX512VBMI | FeatureAVX512IFMA | FeatureAVX512VNNI | FeatureAVX512BF16 | FeatureAVX512VPOPCNTDQ | - FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16; + FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16 | + FeatureAVX512DQ | FeatureAVX512VL; constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 = FeatureAVX10_1 | FeatureEVEX512; constexpr FeatureBitset ImpliedFeaturesAVX10_2 = FeatureAVX10_1; diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll index 54ccc23840f99..f02d11648362c 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=f16c,fma | FileCheck %s --check-prefix=F16C ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=F16C -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 | FileCheck %s --check-prefix=FP16 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16 define <2 x half> @foo(<2 x half> %0) "unsafe-fp-math"="true" nounwind { ; AVX2-LABEL: foo: diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll index 7473ca9da9ff0..36b95e744ba14 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast --enable-no-signed-zeros-fp-math -mattr=avx512fp16 | FileCheck %s --check-prefixes=CHECK,NO-SZ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast -mattr=avx512fp16 | FileCheck %s --check-prefixes=CHECK,HAS-SZ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ ; FADD(acc, FMA(a, b, +0.0)) can be combined to FMA(a, b, acc) if the nsz flag set. define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) { diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll index 9afe46e9e7c63..a509503584649 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce, <32 x half> %rhs.coerce) { ; CHECK-LABEL: test1: diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll index 1d413ad0c1065..43f30da15b20d 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test1: @@ -84,7 +84,7 @@ entry: define dso_local <8 x half> @test6(<8 x half> %acc.coerce, <8 x half> %lhs.coerce.conj, <8 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 +; CHECK-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 ; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0 ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll index d6fe8232b056b..7b142ea170c22 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl --fp-contract=fast --enable-unsafe-fp-math | FileCheck %s define dso_local <32 x half> @test1(<32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test1: diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll index 1318f607ea931..c306bfdd0c614 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-ph-w-vl-intrinsics.ll @@ -761,7 +761,7 @@ define <4 x half> @test_s17tofp4(<4 x i17> %arg0) { define <2 x half> @test_u33tofp2(<2 x i33> %arg0) { ; CHECK-LABEL: test_u33tofp2: ; CHECK: # %bb.0: -; CHECK-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; CHECK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; CHECK-NEXT: vcvtqq2ph %xmm0, %xmm0 ; CHECK-NEXT: retq %res = uitofp <2 x i33> %arg0 to <2 x half> diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll index 3040e58b37997..26abf51c76b23 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-cvt.ll @@ -82,7 +82,8 @@ define <8 x half> @f32to4f16_mask(<4 x float> %a, <8 x half> %b, i8 %mask) { ; ; X86-LABEL: f32to4f16_mask: ; X86: # %bb.0: -; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1} ; X86-NEXT: vmovaps %xmm1, %xmm0 ; X86-NEXT: retl @@ -101,7 +102,8 @@ define <8 x half> @f32to8f16_mask(<8 x float> %a, <8 x half> %b, i8 %mask) { ; ; X86-LABEL: f32to8f16_mask: ; X86: # %bb.0: -; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1} ; X86-NEXT: vmovaps %xmm1, %xmm0 ; X86-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll index be0ef7ac478a3..3d4fa9e2cc6fa 100644 --- a/llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll @@ -469,16 +469,17 @@ define <8 x half>@test_int_x86_avx512_mask3_vfmadd_sh(<8 x half> %x0, <8 x half> ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sh: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/137450 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits