[llvm-branch-commits] [llvm] release/18.x: [InstCombine] Fix miscompile in negation of select (#89698) (PR #91089)
https://github.com/nikic closed https://github.com/llvm/llvm-project/pull/91089 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [InstCombine] Fix miscompile in negation of select (#89698) (PR #91089)
nikic wrote: Declining backport: This is an ABI breaking change, and while it would be possible to rewrite the fix in a way that does not break ABI, I don't think the fix itself is important to backport (this is not a regression, the issue exists since at least LLVM 12). https://github.com/llvm/llvm-project/pull/91089 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [X86][EVEX512] Add `HasEVEX512` when `NoVLX` used for 512-bit patterns (#91106) (PR #91118)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/91118 Backport 7963d9a2b3c20561278a85b19e156e013231342c Requested by: @phoebewang >From 8adb337ab73fc7c63e9e330a08f4525ddc84bcda Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Sun, 5 May 2024 18:40:27 +0800 Subject: [PATCH] [X86][EVEX512] Add `HasEVEX512` when `NoVLX` used for 512-bit patterns (#91106) With KNL/KNC being deprecated, we don't need to care about such no VLX cases anymore. We may remove such patterns in the future. Fixes #90844 (cherry picked from commit 7963d9a2b3c20561278a85b19e156e013231342c) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++- llvm/lib/Target/X86/X86InstrAVX512.td | 42 - llvm/test/CodeGen/X86/pr90844.ll| 19 +++ 3 files changed, 43 insertions(+), 22 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr90844.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 71fc6b5047eaa9..c572b27fe401e1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29841,7 +29841,9 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, return R; // AVX512 implicitly uses modulo rotation amounts. - if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) { + if ((Subtarget.hasVLX() || + (Subtarget.hasAVX512() && Subtarget.hasEVEX512())) && + 32 <= EltSizeInBits) { // Attempt to rotate by immediate. if (IsCstSplat) { unsigned RotOpc = IsROTL ? X86ISD::VROTLI : X86ISD::VROTRI; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index bb5e22c7142793..0564f2167d8eea 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -814,7 +814,7 @@ defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info, // A 128-bit extract from bits [255:128] of a 512-bit vector should use a // smaller extract to enable EVEX->VEX. -let Predicates = [NoVLX] in { +let Predicates = [NoVLX, HasEVEX512] in { def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), (v2i64 (VEXTRACTI128rr (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), @@ -3068,7 +3068,7 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; } -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; @@ -3099,7 +3099,7 @@ let Predicates = [HasAVX512, NoVLX] in { defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; } -let Predicates = [HasBWI, NoVLX] in { +let Predicates = [HasBWI, NoVLX, HasEVEX512] in { defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; @@ -3493,7 +3493,7 @@ multiclass mask_move_lowering; defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; @@ -3505,7 +3505,7 @@ let Predicates = [HasAVX512, NoVLX] in { defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; } -let Predicates = [HasBWI, NoVLX] in { +let Predicates = [HasBWI, NoVLX, HasEVEX512] in { defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; @@ -4998,8 +4998,8 @@ defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, SchedWriteVecALU, HasAVX512, 1>, T8; -// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. -let Predicates = [HasDQI, NoVLX] in { +// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512. +let Predicates = [HasDQI, NoVLX, HasEVEX512] in { def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), (EXTRACT_SUBREG (VPMULLQZrr @@ -5055,7 +5055,7 @@ multiclass avx512_min_max_lowering { sub_xmm)>; } -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; defm : avx512_min_max_lowering<"VPMINUQZ", umin>; defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; @@ -6032,7 +6032,7 @@ defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SchedWriteVecShift>; // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZrr @@ -6161,14 +6161,14 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X8
[llvm-branch-commits] [llvm] release/18.x: [X86][EVEX512] Add `HasEVEX512` when `NoVLX` used for 512-bit patterns (#91106) (PR #91118)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/91118 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [X86][EVEX512] Add `HasEVEX512` when `NoVLX` used for 512-bit patterns (#91106) (PR #91118)
llvmbot wrote: @RKSimon What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/91118 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [X86][EVEX512] Add `HasEVEX512` when `NoVLX` used for 512-bit patterns (#91106) (PR #91118)
llvmbot wrote: @llvm/pr-subscribers-backend-x86 Author: None (llvmbot) Changes Backport 7963d9a2b3c20561278a85b19e156e013231342c Requested by: @phoebewang --- Full diff: https://github.com/llvm/llvm-project/pull/91118.diff 3 Files Affected: - (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+3-1) - (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+21-21) - (added) llvm/test/CodeGen/X86/pr90844.ll (+19) ``diff diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 71fc6b5047eaa9..c572b27fe401e1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29841,7 +29841,9 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, return R; // AVX512 implicitly uses modulo rotation amounts. - if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) { + if ((Subtarget.hasVLX() || + (Subtarget.hasAVX512() && Subtarget.hasEVEX512())) && + 32 <= EltSizeInBits) { // Attempt to rotate by immediate. if (IsCstSplat) { unsigned RotOpc = IsROTL ? X86ISD::VROTLI : X86ISD::VROTRI; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index bb5e22c7142793..0564f2167d8eea 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -814,7 +814,7 @@ defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info, // A 128-bit extract from bits [255:128] of a 512-bit vector should use a // smaller extract to enable EVEX->VEX. -let Predicates = [NoVLX] in { +let Predicates = [NoVLX, HasEVEX512] in { def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), (v2i64 (VEXTRACTI128rr (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), @@ -3068,7 +3068,7 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; } -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; @@ -3099,7 +3099,7 @@ let Predicates = [HasAVX512, NoVLX] in { defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; } -let Predicates = [HasBWI, NoVLX] in { +let Predicates = [HasBWI, NoVLX, HasEVEX512] in { defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; @@ -3493,7 +3493,7 @@ multiclass mask_move_lowering; defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; @@ -3505,7 +3505,7 @@ let Predicates = [HasAVX512, NoVLX] in { defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; } -let Predicates = [HasBWI, NoVLX] in { +let Predicates = [HasBWI, NoVLX, HasEVEX512] in { defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; @@ -4998,8 +4998,8 @@ defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, SchedWriteVecALU, HasAVX512, 1>, T8; -// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. -let Predicates = [HasDQI, NoVLX] in { +// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512. +let Predicates = [HasDQI, NoVLX, HasEVEX512] in { def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), (EXTRACT_SUBREG (VPMULLQZrr @@ -5055,7 +5055,7 @@ multiclass avx512_min_max_lowering { sub_xmm)>; } -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; defm : avx512_min_max_lowering<"VPMINUQZ", umin>; defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; @@ -6032,7 +6032,7 @@ defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SchedWriteVecShift>; // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), (EXTRACT_SUBREG (v8i64 (VPSRAQZrr @@ -6161,14 +6161,14 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecS defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; -defm : avx512_var_shift_lowering; -defm : avx512_var_shift_lowering; -defm : avx512_var_shift_lowering; -defm : avx512_var_shift_lowering; +defm : avx512_var_shift_lowering; +defm : avx512_var_shift_lowering; +defm : avx512_var_shift_lowering; +defm :
[llvm-branch-commits] [llvm] release/18.x: [AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection (#86972) (PR #91126)
llvmbot wrote: @marcauberer What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/91126 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection (#86972) (PR #91126)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/91126 Backport c482fad2c1de367f8fef2b40361dec00523707f7 Requested by: @marcauberer >From aa3dd9a854c4da433c00d5b383ec1b98a7d3fe0f Mon Sep 17 00:00:00 2001 From: Marc Auberer Date: Thu, 28 Mar 2024 23:08:38 +0100 Subject: [PATCH] [AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection (#86972) Fixes #86917 `FCMP_TRUE` and `FCMP_FALSE` were previously not considered and we ended up in an llvm_unreachable assertion. (cherry picked from commit c482fad2c1de367f8fef2b40361dec00523707f7) --- .../AArch64/GISel/AArch64GlobalISelUtils.cpp | 6 ++ .../CodeGen/AArch64/GlobalISel/select.mir | 20 .../AArch64/neon-compare-instructions.ll | 101 ++ 3 files changed, 127 insertions(+) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp index 92db89cc0915b8..80fe4bcb8b58f7 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp @@ -147,6 +147,12 @@ void AArch64GISelUtils::changeFCMPPredToAArch64CC( case CmpInst::FCMP_UNE: CondCode = AArch64CC::NE; break; + case CmpInst::FCMP_TRUE: +CondCode = AArch64CC::AL; +break; + case CmpInst::FCMP_FALSE: +CondCode = AArch64CC::NV; +break; } } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir index 60cddbf794bc7a..ae78d4be0f88af 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir @@ -183,6 +183,14 @@ registers: - { id: 5, class: gpr } - { id: 6, class: gpr } - { id: 7, class: gpr } + - { id: 8, class: fpr } + - { id: 9, class: gpr } + - { id: 10, class: fpr } + - { id: 11, class: gpr } + - { id: 12, class: gpr } + - { id: 13, class: gpr } + - { id: 14, class: gpr } + - { id: 15, class: gpr } # CHECK: body: # CHECK:nofpexcept FCMPSrr %0, %0, implicit-def $nzcv @@ -209,6 +217,18 @@ body: | %7(s32) = G_ANYEXT %5 $w0 = COPY %7(s32) +%8(s32) = COPY $s0 +%9(s32) = G_FCMP floatpred(true), %8, %8 +%12(s8) = G_TRUNC %9(s32) +%14(s32) = G_ANYEXT %12 +$w0 = COPY %14(s32) + +%10(s64) = COPY $d0 +%11(s32) = G_FCMP floatpred(false), %10, %10 +%13(s8) = G_TRUNC %11(s32) +%15(s32) = G_ANYEXT %13 +$w0 = COPY %15(s32) + ... --- diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll index 765c81e26e13ca..c4c00f8e97942e 100644 --- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -2870,6 +2870,107 @@ define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) { ret <2 x i64> %tmp4 } +define <2 x i32> @fcmal2xfloat(<2 x float> %A, <2 x float> %B) { +; CHECK-SD-LABEL: fcmal2xfloat: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT:movi v0.2d, #0x +; CHECK-SD-NEXT:ret +; +; CHECK-GI-LABEL: fcmal2xfloat: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT:movi v0.2s, #1 +; CHECK-GI-NEXT:shl v0.2s, v0.2s, #31 +; CHECK-GI-NEXT:sshr v0.2s, v0.2s, #31 +; CHECK-GI-NEXT:ret + %tmp3 = fcmp true <2 x float> %A, %B + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 +} + +define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) { +; CHECK-SD-LABEL: fcmal4xfloat: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT:movi v0.2d, #0x +; CHECK-SD-NEXT:ret +; +; CHECK-GI-LABEL: fcmal4xfloat: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT:mov w8, #1 // =0x1 +; CHECK-GI-NEXT:fmov s0, w8 +; CHECK-GI-NEXT:mov v1.16b, v0.16b +; CHECK-GI-NEXT:mov v1.h[1], v0.h[0] +; CHECK-GI-NEXT:mov v0.h[1], v0.h[0] +; CHECK-GI-NEXT:ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT:ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT:mov v1.d[1], v0.d[0] +; CHECK-GI-NEXT:shl v0.4s, v1.4s, #31 +; CHECK-GI-NEXT:sshr v0.4s, v0.4s, #31 +; CHECK-GI-NEXT:ret + %tmp3 = fcmp true <4 x float> %A, %B + %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 +} +define <2 x i64> @fcmal2xdouble(<2 x double> %A, <2 x double> %B) { +; CHECK-SD-LABEL: fcmal2xdouble: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT:movi v0.2d, #0x +; CHECK-SD-NEXT:ret +; +; CHECK-GI-LABEL: fcmal2xdouble: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT:adrp x8, .LCPI221_0 +; CHECK-GI-NEXT:ldr q0, [x8, :lo12:.LCPI221_0] +; CHECK-GI-NEXT:shl v0.2d, v0.2d, #63 +; CHECK-GI-NEXT:sshr v0.2d, v0.2d, #63 +; CHECK-GI-NEXT:ret + %tmp3 = fcmp true <2 x double> %A, %B + %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 +} + +define <2 x i32> @fcmnv2xfloat(<2 x float> %A, <2 x float> %B) { +; CHECK-LABEL: fcmnv2xfloat: +; CHECK: // %bb.0: +;
[llvm-branch-commits] [llvm] release/18.x: [AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection (#86972) (PR #91126)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/91126 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection (#86972) (PR #91126)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: None (llvmbot) Changes Backport c482fad2c1de367f8fef2b40361dec00523707f7 Requested by: @marcauberer --- Full diff: https://github.com/llvm/llvm-project/pull/91126.diff 3 Files Affected: - (modified) llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp (+6) - (modified) llvm/test/CodeGen/AArch64/GlobalISel/select.mir (+20) - (modified) llvm/test/CodeGen/AArch64/neon-compare-instructions.ll (+101) ``diff diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp index 92db89cc0915b8..80fe4bcb8b58f7 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp @@ -147,6 +147,12 @@ void AArch64GISelUtils::changeFCMPPredToAArch64CC( case CmpInst::FCMP_UNE: CondCode = AArch64CC::NE; break; + case CmpInst::FCMP_TRUE: +CondCode = AArch64CC::AL; +break; + case CmpInst::FCMP_FALSE: +CondCode = AArch64CC::NV; +break; } } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir index 60cddbf794bc7a..ae78d4be0f88af 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir @@ -183,6 +183,14 @@ registers: - { id: 5, class: gpr } - { id: 6, class: gpr } - { id: 7, class: gpr } + - { id: 8, class: fpr } + - { id: 9, class: gpr } + - { id: 10, class: fpr } + - { id: 11, class: gpr } + - { id: 12, class: gpr } + - { id: 13, class: gpr } + - { id: 14, class: gpr } + - { id: 15, class: gpr } # CHECK: body: # CHECK:nofpexcept FCMPSrr %0, %0, implicit-def $nzcv @@ -209,6 +217,18 @@ body: | %7(s32) = G_ANYEXT %5 $w0 = COPY %7(s32) +%8(s32) = COPY $s0 +%9(s32) = G_FCMP floatpred(true), %8, %8 +%12(s8) = G_TRUNC %9(s32) +%14(s32) = G_ANYEXT %12 +$w0 = COPY %14(s32) + +%10(s64) = COPY $d0 +%11(s32) = G_FCMP floatpred(false), %10, %10 +%13(s8) = G_TRUNC %11(s32) +%15(s32) = G_ANYEXT %13 +$w0 = COPY %15(s32) + ... --- diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll index 765c81e26e13ca..c4c00f8e97942e 100644 --- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -2870,6 +2870,107 @@ define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) { ret <2 x i64> %tmp4 } +define <2 x i32> @fcmal2xfloat(<2 x float> %A, <2 x float> %B) { +; CHECK-SD-LABEL: fcmal2xfloat: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT:movi v0.2d, #0x +; CHECK-SD-NEXT:ret +; +; CHECK-GI-LABEL: fcmal2xfloat: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT:movi v0.2s, #1 +; CHECK-GI-NEXT:shl v0.2s, v0.2s, #31 +; CHECK-GI-NEXT:sshr v0.2s, v0.2s, #31 +; CHECK-GI-NEXT:ret + %tmp3 = fcmp true <2 x float> %A, %B + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 +} + +define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) { +; CHECK-SD-LABEL: fcmal4xfloat: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT:movi v0.2d, #0x +; CHECK-SD-NEXT:ret +; +; CHECK-GI-LABEL: fcmal4xfloat: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT:mov w8, #1 // =0x1 +; CHECK-GI-NEXT:fmov s0, w8 +; CHECK-GI-NEXT:mov v1.16b, v0.16b +; CHECK-GI-NEXT:mov v1.h[1], v0.h[0] +; CHECK-GI-NEXT:mov v0.h[1], v0.h[0] +; CHECK-GI-NEXT:ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT:ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT:mov v1.d[1], v0.d[0] +; CHECK-GI-NEXT:shl v0.4s, v1.4s, #31 +; CHECK-GI-NEXT:sshr v0.4s, v0.4s, #31 +; CHECK-GI-NEXT:ret + %tmp3 = fcmp true <4 x float> %A, %B + %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 +} +define <2 x i64> @fcmal2xdouble(<2 x double> %A, <2 x double> %B) { +; CHECK-SD-LABEL: fcmal2xdouble: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT:movi v0.2d, #0x +; CHECK-SD-NEXT:ret +; +; CHECK-GI-LABEL: fcmal2xdouble: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT:adrp x8, .LCPI221_0 +; CHECK-GI-NEXT:ldr q0, [x8, :lo12:.LCPI221_0] +; CHECK-GI-NEXT:shl v0.2d, v0.2d, #63 +; CHECK-GI-NEXT:sshr v0.2d, v0.2d, #63 +; CHECK-GI-NEXT:ret + %tmp3 = fcmp true <2 x double> %A, %B + %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 +} + +define <2 x i32> @fcmnv2xfloat(<2 x float> %A, <2 x float> %B) { +; CHECK-LABEL: fcmnv2xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT:movi v0.2d, # +; CHECK-NEXT:ret + %tmp3 = fcmp false <2 x float> %A, %B + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 +} + +define <4 x i32> @fcmnv4xfloat(<4 x float> %A, <4 x float> %B) { +; CHECK-SD-LABEL: fcmnv4xfloat: +; C
[llvm-branch-commits] [llvm] release/18.x: [X86][EVEX512] Add `HasEVEX512` when `NoVLX` used for 512-bit patterns (#91106) (PR #91118)
https://github.com/RKSimon approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/91118 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [nfc][ThinLTO] Generate import status in per-module combined summary (PR #88024)
https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/88024 >From cfb63d775d43a28b560d938346f1dd4b2dddc765 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Thu, 4 Apr 2024 11:54:17 -0700 Subject: [PATCH 1/7] function import changes --- llvm/include/llvm/IR/ModuleSummaryIndex.h | 24 .../llvm/Transforms/IPO/FunctionImport.h | 18 ++- llvm/lib/LTO/LTO.cpp | 13 +- llvm/lib/LTO/LTOBackend.cpp | 5 +- llvm/lib/LTO/ThinLTOCodeGenerator.cpp | 9 +- llvm/lib/Transforms/IPO/FunctionImport.cpp| 130 -- llvm/tools/llvm-link/llvm-link.cpp| 2 +- 7 files changed, 146 insertions(+), 55 deletions(-) diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 286b51bda0e2c1..259fe56ce5f63e 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -296,6 +296,30 @@ template <> struct DenseMapInfo { static unsigned getHashValue(ValueInfo I) { return (uintptr_t)I.getRef(); } }; +struct SummaryImportInfo { + enum class ImportType : uint8_t { +NotImported = 0, +Declaration = 1, +Definition = 2, + }; + unsigned Type : 3; + SummaryImportInfo() : Type(static_cast(ImportType::NotImported)) {} + SummaryImportInfo(ImportType Type) : Type(static_cast(Type)) {} + + // FIXME: delete the first two set* helper function. + void updateType(ImportType InputType) { +Type = std::max(Type, static_cast(InputType)); + } + + bool isDefinition() const { +return static_cast(Type) == ImportType::Definition; + } + + bool isDeclaration() const { +return static_cast(Type) == ImportType::Declaration; + } +}; + /// Summary of memprof callsite metadata. struct CallsiteInfo { // Actual callee function. diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h index c4d19e8641eca2..9adc0c31eed439 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h @@ -33,7 +33,14 @@ class FunctionImporter { public: /// Set of functions to import from a source module. Each entry is a set /// containing all the GUIDs of all functions to import for a source module. - using FunctionsToImportTy = std::unordered_set; + using FunctionsToImportTy = DenseMap; + + // FIXME: Remove this. + enum ImportStatus { +NotImported, +ImportDeclaration, +ImportDefinition, + }; /// The different reasons selectCallee will chose not to import a /// candidate. @@ -99,8 +106,10 @@ class FunctionImporter { /// index's module path string table). using ImportMapTy = DenseMap; - /// The set contains an entry for every global value the module exports. - using ExportSetTy = DenseSet; + /// The map contains an entry for every global value the module exports, the + /// key being the value info, and the value is the summary-based import info. + /// FIXME: Does this set need to be a map? + using ExportSetTy = DenseMap; /// A function of this type is used to load modules referenced by the index. using ModuleLoaderTy = @@ -211,7 +220,8 @@ void gatherImportedSummariesForModule( StringRef ModulePath, const DenseMap &ModuleToDefinedGVSummaries, const FunctionImporter::ImportMapTy &ImportList, -std::map &ModuleToSummariesForIndex); +std::map &ModuleToSummariesForIndex, +ModuleToGVSummaryPtrSet &ModuleToDecSummaries); /// Emit into \p OutputFilename the files module \p ModulePath will import from. std::error_code EmitImportsFiles( diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 53060df7f503e0..ace533fe28c92f 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -159,7 +159,7 @@ void llvm::computeLTOCacheKey( std::vector ExportsGUID; ExportsGUID.reserve(ExportList.size()); for (const auto &VI : ExportList) { -auto GUID = VI.getGUID(); +auto GUID = VI.first.getGUID(); ExportsGUID.push_back(GUID); } @@ -205,7 +205,7 @@ void llvm::computeLTOCacheKey( AddUint64(Entry.getFunctions().size()); for (auto &Fn : Entry.getFunctions()) - AddUint64(Fn); + AddUint64(Fn.first); } // Include the hash for the resolved ODR. @@ -277,7 +277,7 @@ void llvm::computeLTOCacheKey( for (const ImportModule &ImpM : ImportModulesVector) for (auto &ImpF : ImpM.getFunctions()) { GlobalValueSummary *S = - Index.findSummaryInModule(ImpF, ImpM.getIdentifier()); + Index.findSummaryInModule(ImpF.first, ImpM.getIdentifier()); AddUsedThings(S); // If this is an alias, we also care about any types/etc. that the aliasee // may reference. @@ -1389,15 +1389,18 @@ class lto::ThinBackendProc { llvm::StringRef ModulePath, const std::string &NewModulePath) { std::map ModuleToSummariesForIndex; +Module
[llvm-branch-commits] [llvm] [AArch64][SelectionDAG] Mask for SUBS with multiple users cannot be elided (#90911) (PR #91151)
https://github.com/AtariDreams created https://github.com/llvm/llvm-project/pull/91151 In DAGCombiner, the `performCONDCombine` function attempts to remove AND instructions in front of SUBS (cmp) instructions for which the AND is transparent. The rules for that are correct, but it fails to take into account the case where the SUBS instruction has multiple users with different condition codes for comparison and simply removes the AND for all of them. This causes a miscompilation in the attached test case. (cherry picked from commit 72eaa0ed9934bfaa2449091bbc6e45648d1396d6) >From a46dbc61e1ceff89496284a5b614125754b218b9 Mon Sep 17 00:00:00 2001 From: Weihang Fan <134108011+weihangf-ap...@users.noreply.github.com> Date: Sun, 5 May 2024 04:01:13 -0700 Subject: [PATCH] [AArch64][SelectionDAG] Mask for SUBS with multiple users cannot be elided (#90911) In DAGCombiner, the `performCONDCombine` function attempts to remove AND instructions in front of SUBS (cmp) instructions for which the AND is transparent. The rules for that are correct, but it fails to take into account the case where the SUBS instruction has multiple users with different condition codes for comparison and simply removes the AND for all of them. This causes a miscompilation in the attached test case. (cherry picked from commit 72eaa0ed9934bfaa2449091bbc6e45648d1396d6) --- .../Target/AArch64/AArch64ISelLowering.cpp| 3 ++- llvm/test/CodeGen/AArch64/and-mask-removal.ll | 22 +++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 95d8ab95b2c097..bcfd0253e73c88 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22122,7 +22122,8 @@ SDValue performCONDCombine(SDNode *N, SDNode *SubsNode = N->getOperand(CmpIndex).getNode(); unsigned CondOpcode = SubsNode->getOpcode(); - if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0)) + if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0) || + !SubsNode->hasOneUse()) return SDValue(); // There is a SUBS feeding this condition. Is it fed by a mask we can diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll index 17ff0159701689..a31355549ba87c 100644 --- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll +++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll @@ -526,4 +526,26 @@ define i64 @pr58109b(i8 signext %0, i64 %a, i64 %b) { ret i64 %4 } +define i64 @test_2_selects(i8 zeroext %a) { +; CHECK-LABEL: test_2_selects: +; CHECK: ; %bb.0: +; CHECK-NEXT:add w9, w0, #24 +; CHECK-NEXT:mov w8, #131 +; CHECK-NEXT:and w9, w9, #0xff +; CHECK-NEXT:cmp w9, #81 +; CHECK-NEXT:mov w9, #57 +; CHECK-NEXT:csel x8, x8, xzr, lo +; CHECK-NEXT:csel x9, xzr, x9, eq +; CHECK-NEXT:add x0, x8, x9 +; CHECK-NEXT:ret + %1 = add i8 %a, 24 + %2 = zext i8 %1 to i64 + %3 = icmp ult i8 %1, 81 + %4 = select i1 %3, i64 131, i64 0 + %5 = icmp eq i8 %1, 81 + %6 = select i1 %5, i64 0, i64 57 + %7 = add i64 %4, %6 + ret i64 %7 +} + declare i8 @llvm.usub.sat.i8(i8, i8) #0 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [AArch64][SelectionDAG] Mask for SUBS with multiple users cannot be elided (#90911) (PR #91151)
https://github.com/AtariDreams edited https://github.com/llvm/llvm-project/pull/91151 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [AArch64][SelectionDAG] Mask for SUBS with multiple users cannot be elided (#90911) (PR #91151)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 Author: AtariDreams (AtariDreams) Changes In DAGCombiner, the `performCONDCombine` function attempts to remove AND instructions in front of SUBS (cmp) instructions for which the AND is transparent. The rules for that are correct, but it fails to take into account the case where the SUBS instruction has multiple users with different condition codes for comparison and simply removes the AND for all of them. This causes a miscompilation in the attached test case. (cherry picked from commit 72eaa0ed9934bfaa2449091bbc6e45648d1396d6) --- Full diff: https://github.com/llvm/llvm-project/pull/91151.diff 2 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+2-1) - (modified) llvm/test/CodeGen/AArch64/and-mask-removal.ll (+22) ``diff diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 95d8ab95b2c097..bcfd0253e73c88 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22122,7 +22122,8 @@ SDValue performCONDCombine(SDNode *N, SDNode *SubsNode = N->getOperand(CmpIndex).getNode(); unsigned CondOpcode = SubsNode->getOpcode(); - if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0)) + if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0) || + !SubsNode->hasOneUse()) return SDValue(); // There is a SUBS feeding this condition. Is it fed by a mask we can diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll index 17ff0159701689..a31355549ba87c 100644 --- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll +++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll @@ -526,4 +526,26 @@ define i64 @pr58109b(i8 signext %0, i64 %a, i64 %b) { ret i64 %4 } +define i64 @test_2_selects(i8 zeroext %a) { +; CHECK-LABEL: test_2_selects: +; CHECK: ; %bb.0: +; CHECK-NEXT:add w9, w0, #24 +; CHECK-NEXT:mov w8, #131 +; CHECK-NEXT:and w9, w9, #0xff +; CHECK-NEXT:cmp w9, #81 +; CHECK-NEXT:mov w9, #57 +; CHECK-NEXT:csel x8, x8, xzr, lo +; CHECK-NEXT:csel x9, xzr, x9, eq +; CHECK-NEXT:add x0, x8, x9 +; CHECK-NEXT:ret + %1 = add i8 %a, 24 + %2 = zext i8 %1 to i64 + %3 = icmp ult i8 %1, 81 + %4 = select i1 %3, i64 131, i64 0 + %5 = icmp eq i8 %1, 81 + %6 = select i1 %5, i64 0, i64 57 + %7 = add i64 %4, %6 + ret i64 %7 +} + declare i8 @llvm.usub.sat.i8(i8, i8) #0 `` https://github.com/llvm/llvm-project/pull/91151 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [X86][FP16] Do not create VBROADCAST_LOAD for f16 without AVX2 (#91125) (PR #91161)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/91161 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [X86][FP16] Do not create VBROADCAST_LOAD for f16 without AVX2 (#91125) (PR #91161)
llvmbot wrote: @RKSimon What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/91161 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [X86][FP16] Do not create VBROADCAST_LOAD for f16 without AVX2 (#91125) (PR #91161)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/91161 Backport f7bfb078cf037205a812dc4eece777130cfa88f5 Requested by: @phoebewang >From 71c46700938e77c29942f4fbe14cb3c4c41412cf Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Mon, 6 May 2024 10:59:44 +0800 Subject: [PATCH] [X86][FP16] Do not create VBROADCAST_LOAD for f16 without AVX2 (#91125) AVX doesn't provide 16-bit BROADCAST instruction. Fixes #91005 (cherry picked from commit f7bfb078cf037205a812dc4eece777130cfa88f5) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/X86/pr91005.ll| 39 + 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr91005.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 71fc6b5047eaa9..2752f8a92447cf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7295,7 +7295,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, // With pattern matching, the VBROADCAST node may become a VMOVDDUP. if (ScalarSize == 32 || (ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) || -CVT == MVT::f16 || +(CVT == MVT::f16 && Subtarget.hasAVX2()) || (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2( { const Constant *C = nullptr; if (ConstantSDNode *CI = dyn_cast(Ld)) diff --git a/llvm/test/CodeGen/X86/pr91005.ll b/llvm/test/CodeGen/X86/pr91005.ll new file mode 100644 index 00..97fd1ce4568826 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr91005.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s | FileCheck %s + +define void @PR91005(ptr %0) minsize { +; CHECK-LABEL: PR91005: +; CHECK: # %bb.0: +; CHECK-NEXT:xorl %eax, %eax +; CHECK-NEXT:testb %al, %al +; CHECK-NEXT:je .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT:vbroadcastss {{.*#+}} xmm0 = [31744,31744,31744,31744] +; CHECK-NEXT:vpcmpeqw %xmm0, %xmm0, %xmm0 +; CHECK-NEXT:vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; CHECK-NEXT:vpand %xmm1, %xmm0, %xmm0 +; CHECK-NEXT:vcvtph2ps %xmm0, %xmm0 +; CHECK-NEXT:vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT:vmulss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT:vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-NEXT:vmovd %xmm0, %eax +; CHECK-NEXT:movw %ax, (%rdi) +; CHECK-NEXT: .LBB0_2: # %common.ret +; CHECK-NEXT:retq + %2 = bitcast <2 x half> poison to <2 x i16> + %3 = icmp eq <2 x i16> %2, + br i1 poison, label %4, label %common.ret + +common.ret: ; preds = %4, %1 + ret void + +4:; preds = %1 + %5 = select <2 x i1> %3, <2 x half> , <2 x half> zeroinitializer + %6 = fmul <2 x half> %5, zeroinitializer + %7 = fsub <2 x half> %6, zeroinitializer + %8 = extractelement <2 x half> %7, i64 0 + store half %8, ptr %0, align 2 + br label %common.ret +} + +declare <2 x half> @llvm.fabs.v2f16(<2 x half>) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [X86][FP16] Do not create VBROADCAST_LOAD for f16 without AVX2 (#91125) (PR #91161)
llvmbot wrote: @llvm/pr-subscribers-backend-x86 Author: None (llvmbot) Changes Backport f7bfb078cf037205a812dc4eece777130cfa88f5 Requested by: @phoebewang --- Full diff: https://github.com/llvm/llvm-project/pull/91161.diff 2 Files Affected: - (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+1-1) - (added) llvm/test/CodeGen/X86/pr91005.ll (+39) ``diff diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 71fc6b5047eaa9..2752f8a92447cf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7295,7 +7295,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, // With pattern matching, the VBROADCAST node may become a VMOVDDUP. if (ScalarSize == 32 || (ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) || -CVT == MVT::f16 || +(CVT == MVT::f16 && Subtarget.hasAVX2()) || (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2( { const Constant *C = nullptr; if (ConstantSDNode *CI = dyn_cast(Ld)) diff --git a/llvm/test/CodeGen/X86/pr91005.ll b/llvm/test/CodeGen/X86/pr91005.ll new file mode 100644 index 00..97fd1ce4568826 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr91005.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s | FileCheck %s + +define void @PR91005(ptr %0) minsize { +; CHECK-LABEL: PR91005: +; CHECK: # %bb.0: +; CHECK-NEXT:xorl %eax, %eax +; CHECK-NEXT:testb %al, %al +; CHECK-NEXT:je .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT:vbroadcastss {{.*#+}} xmm0 = [31744,31744,31744,31744] +; CHECK-NEXT:vpcmpeqw %xmm0, %xmm0, %xmm0 +; CHECK-NEXT:vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; CHECK-NEXT:vpand %xmm1, %xmm0, %xmm0 +; CHECK-NEXT:vcvtph2ps %xmm0, %xmm0 +; CHECK-NEXT:vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT:vmulss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT:vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-NEXT:vmovd %xmm0, %eax +; CHECK-NEXT:movw %ax, (%rdi) +; CHECK-NEXT: .LBB0_2: # %common.ret +; CHECK-NEXT:retq + %2 = bitcast <2 x half> poison to <2 x i16> + %3 = icmp eq <2 x i16> %2, + br i1 poison, label %4, label %common.ret + +common.ret: ; preds = %4, %1 + ret void + +4:; preds = %1 + %5 = select <2 x i1> %3, <2 x half> , <2 x half> zeroinitializer + %6 = fmul <2 x half> %5, zeroinitializer + %7 = fsub <2 x half> %6, zeroinitializer + %8 = extractelement <2 x half> %7, i64 0 + store half %8, ptr %0, align 2 + br label %common.ret +} + +declare <2 x half> @llvm.fabs.v2f16(<2 x half>) `` https://github.com/llvm/llvm-project/pull/91161 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [AArch64][SelectionDAG] Mask for SUBS with multiple users cannot be elided (#90911) (PR #91151)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/91151 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [workflows] Fix libclang-abi-tests to work with new version scheme (PR #91096)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/91096 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [LV, LAA] Don't vectorize loops with load and store to invar address. (PR #91092)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/91092 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [DAGCombiner] In mergeTruncStore, make sure we aren't storing shifted in bits. (#90939) (PR #91038)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/91038 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [SelectionDAG] Mark frame index as "aliased" at argument copy elison (PR #91035)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/91035 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/18.x: [clang-format] Don't remove parentheses of fold expressions (#91045) (PR #91165)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/91165 Backport db0ed5533368 Requested by: @owenca >From 25c8774fdb84a439d7bc2d91bfe222b0f05c5453 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Sun, 5 May 2024 21:33:41 -0700 Subject: [PATCH] [clang-format] Don't remove parentheses of fold expressions (#91045) Fixes #90966. (cherry picked from commit db0ed5533368414b1c4e1c884eef651c66359da2) --- clang/lib/Format/UnwrappedLineParser.cpp | 7 ++- clang/unittests/Format/FormatTest.cpp| 9 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index a6eb18bb2b3227..f70affb732a0d8 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -2510,6 +2510,7 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { assert(FormatTok->is(tok::l_paren) && "'(' expected."); auto *LeftParen = FormatTok; bool SeenEqual = false; + bool MightBeFoldExpr = false; const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); nextToken(); do { @@ -2521,7 +2522,7 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { parseChildBlock(); break; case tok::r_paren: - if (!MightBeStmtExpr && !Line->InMacroBody && + if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody && Style.RemoveParentheses > FormatStyle::RPS_Leave) { const auto *Prev = LeftParen->Previous; const auto *Next = Tokens->peekNextToken(); @@ -2564,6 +2565,10 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { parseBracedList(); } break; +case tok::ellipsis: + MightBeFoldExpr = true; + nextToken(); + break; case tok::equal: SeenEqual = true; if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 88877e53d014c6..923128672c3166 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -26894,8 +26894,14 @@ TEST_F(FormatTest, RemoveParentheses) { "if ((({ a; })))\n" " b;", Style); + verifyFormat("static_assert((std::is_constructible_v && ...));", + "static_assert(((std::is_constructible_v && ...)));", + Style); verifyFormat("return (0);", "return (((0)));", Style); verifyFormat("return (({ 0; }));", "return ((({ 0; })));", Style); + verifyFormat("return ((... && std::is_convertible_v));", + "return (((... && std::is_convertible_v)));", + Style); Style.RemoveParentheses = FormatStyle::RPS_ReturnStatement; verifyFormat("#define Return0 return (0);", Style); @@ -26903,6 +26909,9 @@ TEST_F(FormatTest, RemoveParentheses) { verifyFormat("co_return 0;", "co_return ((0));", Style); verifyFormat("return 0;", "return (((0)));", Style); verifyFormat("return ({ 0; });", "return ((({ 0; })));", Style); + verifyFormat("return (... && std::is_convertible_v);", + "return (((... && std::is_convertible_v)));", + Style); verifyFormat("inline decltype(auto) f() {\n" " if (a) {\n" "return (a);\n" ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/18.x: [clang-format] Don't remove parentheses of fold expressions (#91045) (PR #91165)
llvmbot wrote: @rymiel What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/91165 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [AMDGPU] Fix GFX12 encoding of s_wait_event export_ready (#89622) (PR #91034)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/91034 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/18.x: [clang-format] Don't remove parentheses of fold expressions (#91045) (PR #91165)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/91165 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/18.x: [clang-format] Don't remove parentheses of fold expressions (#91045) (PR #91165)
llvmbot wrote: @llvm/pr-subscribers-clang-format Author: None (llvmbot) Changes Backport db0ed5533368 Requested by: @owenca --- Full diff: https://github.com/llvm/llvm-project/pull/91165.diff 2 Files Affected: - (modified) clang/lib/Format/UnwrappedLineParser.cpp (+6-1) - (modified) clang/unittests/Format/FormatTest.cpp (+9) ``diff diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index a6eb18bb2b3227..f70affb732a0d8 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -2510,6 +2510,7 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { assert(FormatTok->is(tok::l_paren) && "'(' expected."); auto *LeftParen = FormatTok; bool SeenEqual = false; + bool MightBeFoldExpr = false; const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); nextToken(); do { @@ -2521,7 +2522,7 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { parseChildBlock(); break; case tok::r_paren: - if (!MightBeStmtExpr && !Line->InMacroBody && + if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody && Style.RemoveParentheses > FormatStyle::RPS_Leave) { const auto *Prev = LeftParen->Previous; const auto *Next = Tokens->peekNextToken(); @@ -2564,6 +2565,10 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { parseBracedList(); } break; +case tok::ellipsis: + MightBeFoldExpr = true; + nextToken(); + break; case tok::equal: SeenEqual = true; if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 88877e53d014c6..923128672c3166 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -26894,8 +26894,14 @@ TEST_F(FormatTest, RemoveParentheses) { "if ((({ a; })))\n" " b;", Style); + verifyFormat("static_assert((std::is_constructible_v && ...));", + "static_assert(((std::is_constructible_v && ...)));", + Style); verifyFormat("return (0);", "return (((0)));", Style); verifyFormat("return (({ 0; }));", "return ((({ 0; })));", Style); + verifyFormat("return ((... && std::is_convertible_v));", + "return (((... && std::is_convertible_v)));", + Style); Style.RemoveParentheses = FormatStyle::RPS_ReturnStatement; verifyFormat("#define Return0 return (0);", Style); @@ -26903,6 +26909,9 @@ TEST_F(FormatTest, RemoveParentheses) { verifyFormat("co_return 0;", "co_return ((0));", Style); verifyFormat("return 0;", "return (((0)));", Style); verifyFormat("return ({ 0; });", "return ((({ 0; })));", Style); + verifyFormat("return (... && std::is_convertible_v);", + "return (((... && std::is_convertible_v)));", + Style); verifyFormat("inline decltype(auto) f() {\n" " if (a) {\n" "return (a);\n" `` https://github.com/llvm/llvm-project/pull/91165 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/18.x: [clang-format] Don't remove parentheses of fold expressions (#91045) (PR #91165)
https://github.com/rymiel approved this pull request. https://github.com/llvm/llvm-project/pull/91165 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [DAGCombiner] In mergeTruncStore, make sure we aren't storing shifted in bits. (#90939) (PR #91038)
topperc wrote: @AtariDreams This bug has existed since at least LLVM 10. What makes it a candidate for backporting? https://github.com/llvm/llvm-project/pull/91038 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x: [AMDGPU] Fix GFX12 encoding of s_wait_event export_ready (#89622) (PR #91034)
https://github.com/jayfoad approved this pull request. https://github.com/llvm/llvm-project/pull/91034 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits