https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/204144
>From af79ef9c7d711f0215456ced9924367c1be71acd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <[email protected]> Date: Tue, 16 Jun 2026 14:19:33 +0100 Subject: [PATCH 1/4] [WIP][X86] Replace X86 specific pdep/pext handling with generic PDEP/PEXT intrinsics * Remove X86ISD::PDEP/PEXT and use ISD::PDEP/PEXT instead * AutoUpgrade x86 pdep/pext intrinsics to llvm.pdep/pext generics * Move X86 DAG knownbits/demandedbits handling to generic (unchanged) * Move X86 InstCombine folds to generic (unchanged) * Updated clang builtins to emit generics * Add generic test coverage to msan + instcombine - similar to existing x86 tests --- clang/lib/CodeGen/TargetBuiltins/X86.cpp | 10 +++ clang/test/CodeGen/X86/bmi2-builtins.c | 8 +- llvm/include/llvm/IR/IntrinsicsX86.td | 12 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++ .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 18 ++++ .../CodeGen/SelectionDAG/TargetLowering.cpp | 28 ++++++ llvm/lib/IR/AutoUpgrade.cpp | 8 ++ llvm/lib/Target/X86/X86ISelLowering.cpp | 50 +---------- .../Target/X86/X86InstCombineIntrinsic.cpp | 88 ------------------- llvm/lib/Target/X86/X86InstrFragments.td | 4 - llvm/lib/Target/X86/X86InstrMisc.td | 54 ++---------- llvm/lib/Target/X86/X86IntrinsicsInfo.h | 4 - .../InstCombine/InstCombineCalls.cpp | 58 ++++++++++++ .../Instrumentation/MemorySanitizer.cpp | 29 +++++- llvm/test/CodeGen/X86/bmi2.ll | 23 +++-- .../Instrumentation/MemorySanitizer/bmi.ll | 16 ++-- .../Instrumentation/MemorySanitizer/pdep.ll | 35 +++++--- .../Instrumentation/MemorySanitizer/pext.ll | 35 +++++--- llvm/test/Transforms/InstCombine/pdep.ll | 30 +++---- llvm/test/Transforms/InstCombine/pext.ll | 30 +++---- 20 files changed, 259 insertions(+), 287 deletions(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index acfeb9967cd2f..50125a71fcd5f 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -976,6 +976,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); } + case X86::BI__builtin_ia32_pdep_si: + case X86::BI__builtin_ia32_pdep_di: { + Function *F = CGM.getIntrinsic(Intrinsic::pdep, Ops[0]->getType()); + return Builder.CreateCall(F, Ops); + } + case X86::BI__builtin_ia32_pext_si: + case X86::BI__builtin_ia32_pext_di: { + Function *F = CGM.getIntrinsic(Intrinsic::pext, Ops[0]->getType()); + return Builder.CreateCall(F, Ops); + } case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: diff --git a/clang/test/CodeGen/X86/bmi2-builtins.c b/clang/test/CodeGen/X86/bmi2-builtins.c index 1b2cb9048adb2..c83cc43d9fc3f 100644 --- a/clang/test/CodeGen/X86/bmi2-builtins.c +++ b/clang/test/CodeGen/X86/bmi2-builtins.c @@ -17,12 +17,12 @@ unsigned int test_bzhi_u32(unsigned int __X, unsigned int __Y) { } unsigned int test_pdep_u32(unsigned int __X, unsigned int __Y) { - // CHECK: @llvm.x86.bmi.pdep.32 + // CHECK: @llvm.pdep.i32 return _pdep_u32(__X, __Y); } unsigned int test_pext_u32(unsigned int __X, unsigned int __Y) { - // CHECK: @llvm.x86.bmi.pext.32 + // CHECK: @llvm.pext.i32 return _pext_u32(__X, __Y); } @@ -41,12 +41,12 @@ unsigned long long test_bzhi_u64(unsigned long long __X, unsigned long long __Y) } unsigned long long test_pdep_u64(unsigned long long __X, unsigned long long __Y) { - // CHECK: @llvm.x86.bmi.pdep.64 + // CHECK: @llvm.pdep.i64 return _pdep_u64(__X, __Y); } unsigned long long test_pext_u64(unsigned long long __X, unsigned long long __Y) { - // CHECK: @llvm.x86.bmi.pext.64 + // CHECK: @llvm.pext.i64 return _pext_u64(__X, __Y); } diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index b75a0485d6263..5c7785731111c 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -2575,18 +2575,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_bmi_bzhi_64 : ClangBuiltin<"__builtin_ia32_bzhi_di">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_bmi_pdep_32 : ClangBuiltin<"__builtin_ia32_pdep_si">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_bmi_pdep_64 : ClangBuiltin<"__builtin_ia32_pdep_di">, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - [IntrNoMem]>; - def int_x86_bmi_pext_32 : ClangBuiltin<"__builtin_ia32_pext_si">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_bmi_pext_64 : ClangBuiltin<"__builtin_ia32_pext_di">, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - [IntrNoMem]>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5a4ae64cb98af..122b7f89c9d6c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12245,12 +12245,18 @@ SDValue DAGCombiner::visitPDEP(SDNode *N) { // pdep(x, 0) -> 0 if (isNullOrNullSplat(N1)) return DAG.getConstant(0, DL, VT); + // pdep(x, -1) -> x (all positions selected, bits deposited at identity) if (isAllOnesOrAllOnesSplat(N1)) return N0; + // fold pdep(c1, c2) -> expandBits(c1, c2) if (SDValue C = DAG.FoldConstantArithmetic(ISD::PDEP, DL, VT, {N0, N1})) return C; + + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b32c16fe4300f..44120cceed2a3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3952,6 +3952,24 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(1); break; } + case ISD::PDEP: { + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + // Zeros are retained from the mask operand. But not ones. + Known.One.clearAllBits(); + // The result will have at least as many trailing zeros as the non-mask + // operand since bits can only map to the same or higher bit position. + Known.Zero.setLowBits(Known2.countMinTrailingZeros()); + break; + } + case ISD::PEXT: { + Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + // The result has as many leading zeros as the number of zeroes in the mask. + unsigned Count = Known.Zero.popcount(); + Known.Zero = APInt::getHighBitsSet(BitWidth, Count); + Known.One.clearAllBits(); + break; + } case ISD::CLMUL: { Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5999e7a9c9fb2..0bd636d19065f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2463,6 +2463,34 @@ bool TargetLowering::SimplifyDemandedBits( Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); break; } + case ISD::PDEP: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + unsigned DemandedBitsLZ = OriginalDemandedBits.countl_zero(); + APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); + + // If the demanded bits has leading zeroes, we don't demand those from the + // mask. + if (SimplifyDemandedBits(Op1, LoMask, Known, TLO, Depth + 1)) + return true; + + // The number of possible 1s in the mask determines the number of LSBs of + // operand 0 used. Undemanded bits from the mask don't matter so filter + // them before counting. + KnownBits Known2; + uint64_t Count = (~Known.Zero & LoMask).popcount(); + APInt DemandedMask(APInt::getLowBitsSet(BitWidth, Count)); + if (SimplifyDemandedBits(Op0, DemandedMask, Known2, TLO, Depth + 1)) + return true; + + // Zeroes are retained from the mask, but not ones. + Known.One.clearAllBits(); + // The result will have at least as many trailing zeros as the non-mask + // operand since bits can only map to the same or higher bit position. + Known.Zero.setLowBits(Known2.countMinTrailingZeros()); + break; + } case ISD::SIGN_EXTEND_INREG: { SDValue Op0 = Op.getOperand(0); EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 814e985ebf7be..9422fc6129efd 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -533,6 +533,10 @@ static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0 Name.starts_with("vprot")); // Added in 8.0 + if (Name.consume_front("bmi.")) + return (Name.starts_with("pdep.") || // Added in 23.0 + Name.starts_with("pext.")); // Added in 23.0 + return (Name == "addcarry.u32" || // Added in 8.0 Name == "addcarry.u64" || // Added in 8.0 Name == "addcarryx.u32" || // Added in 8.0 @@ -4616,6 +4620,10 @@ static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, } else if (Name.starts_with("avx512.mask.") && upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) { // Rep will be updated by the call in the condition. + } else if (Name.starts_with("bmi.pdep.")) { + Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::pdep); + } else if (Name.starts_with("bmi.pext.")) { + Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::pext); } else reportFatalUsageErrorWithCI("Unexpected intrinsic", CI); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b9a65e2671aa9..1bc4bfd4251cf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39748,25 +39748,6 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.One.clearAllBits(); break; } - case X86ISD::PDEP: { - KnownBits Known2; - Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - // Zeros are retained from the mask operand. But not ones. - Known.One.clearAllBits(); - // The result will have at least as many trailing zeros as the non-mask - // operand since bits can only map to the same or higher bit position. - Known.Zero.setLowBits(Known2.countMinTrailingZeros()); - break; - } - case X86ISD::PEXT: { - Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - // The result has as many leading zeros as the number of zeroes in the mask. - unsigned Count = Known.Zero.popcount(); - Known.Zero = APInt::getHighBitsSet(BitWidth, Count); - Known.One.clearAllBits(); - break; - } case X86ISD::VTRUNC: case X86ISD::VTRUNCS: case X86ISD::VTRUNCUS: @@ -46015,34 +45996,6 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( break; } - case X86ISD::PDEP: { - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - - unsigned DemandedBitsLZ = OriginalDemandedBits.countl_zero(); - APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); - - // If the demanded bits has leading zeroes, we don't demand those from the - // mask. - if (SimplifyDemandedBits(Op1, LoMask, Known, TLO, Depth + 1)) - return true; - - // The number of possible 1s in the mask determines the number of LSBs of - // operand 0 used. Undemanded bits from the mask don't matter so filter - // them before counting. - KnownBits Known2; - uint64_t Count = (~Known.Zero & LoMask).popcount(); - APInt DemandedMask(APInt::getLowBitsSet(BitWidth, Count)); - if (SimplifyDemandedBits(Op0, DemandedMask, Known2, TLO, Depth + 1)) - return true; - - // Zeroes are retained from the mask, but not ones. - Known.One.clearAllBits(); - // The result will have at least as many trailing zeros as the non-mask - // operand since bits can only map to the same or higher bit position. - Known.Zero.setLowBits(Known2.countMinTrailingZeros()); - return false; - } case X86ISD::VPMADD52L: case X86ISD::VPMADD52H: { KnownBits KnownOp0, KnownOp1, KnownOp2; @@ -63423,8 +63376,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::MOVDQ2Q: return combineMOVDQ2Q(N, DAG); case X86ISD::BEXTR: case X86ISD::BEXTRI: - case X86ISD::BZHI: - case X86ISD::PDEP: return combineBMI(N, DAG, DCI); + case X86ISD::BZHI: return combineBMI(N, DAG, DCI); case X86ISD::PCLMULQDQ: return combinePCLMULQDQ(N, DAG, DCI); case ISD::INTRINSIC_WO_CHAIN: return combineINTRINSIC_WO_CHAIN(N, DAG, DCI); case ISD::INTRINSIC_W_CHAIN: return combineINTRINSIC_W_CHAIN(N, DAG, DCI); diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index 4999581489e82..ad1c171428671 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -2259,94 +2259,6 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { // TODO should we convert this to an AND if the RHS is constant? } break; - case Intrinsic::x86_bmi_pext_32: - case Intrinsic::x86_bmi_pext_64: - if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) { - if (MaskC->isNullValue()) { - return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0)); - } - if (MaskC->isAllOnesValue()) { - return IC.replaceInstUsesWith(II, II.getArgOperand(0)); - } - - unsigned MaskIdx, MaskLen; - if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { - // any single contingous sequence of 1s anywhere in the mask simply - // describes a subset of the input bits shifted to the appropriate - // position. Replace with the straight forward IR. - Value *Input = II.getArgOperand(0); - Value *Masked = IC.Builder.CreateAnd(Input, II.getArgOperand(1)); - Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx); - Value *Shifted = IC.Builder.CreateLShr(Masked, ShiftAmt); - return IC.replaceInstUsesWith(II, Shifted); - } - - if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) { - uint64_t Src = SrcC->getZExtValue(); - uint64_t Mask = MaskC->getZExtValue(); - uint64_t Result = 0; - uint64_t BitToSet = 1; - - while (Mask) { - // Isolate lowest set bit. - uint64_t BitToTest = Mask & -Mask; - if (BitToTest & Src) - Result |= BitToSet; - - BitToSet <<= 1; - // Clear lowest set bit. - Mask &= Mask - 1; - } - - return IC.replaceInstUsesWith(II, - ConstantInt::get(II.getType(), Result)); - } - } - break; - case Intrinsic::x86_bmi_pdep_32: - case Intrinsic::x86_bmi_pdep_64: - if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) { - if (MaskC->isNullValue()) { - return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0)); - } - if (MaskC->isAllOnesValue()) { - return IC.replaceInstUsesWith(II, II.getArgOperand(0)); - } - - unsigned MaskIdx, MaskLen; - if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { - // any single contingous sequence of 1s anywhere in the mask simply - // describes a subset of the input bits shifted to the appropriate - // position. Replace with the straight forward IR. - Value *Input = II.getArgOperand(0); - Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx); - Value *Shifted = IC.Builder.CreateShl(Input, ShiftAmt); - Value *Masked = IC.Builder.CreateAnd(Shifted, II.getArgOperand(1)); - return IC.replaceInstUsesWith(II, Masked); - } - - if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) { - uint64_t Src = SrcC->getZExtValue(); - uint64_t Mask = MaskC->getZExtValue(); - uint64_t Result = 0; - uint64_t BitToTest = 1; - - while (Mask) { - // Isolate lowest set bit. - uint64_t BitToSet = Mask & -Mask; - if (BitToTest & Src) - Result |= BitToSet; - - BitToTest <<= 1; - // Clear lowest set bit; - Mask &= Mask - 1; - } - - return IC.replaceInstUsesWith(II, - ConstantInt::get(II.getType(), Result)); - } - } - break; case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td index 9316360c5e02a..923b968382866 100644 --- a/llvm/lib/Target/X86/X86InstrFragments.td +++ b/llvm/lib/Target/X86/X86InstrFragments.td @@ -424,10 +424,6 @@ def X86bextri : SDNode<"X86ISD::BEXTRI", SDTIntBinOp>; // Zero High Bits Starting with Specified Bit Position. def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>; -// Parallel extract and deposit. -def X86pdep : SDNode<"X86ISD::PDEP", SDTIntBinOp>; -def X86pext : SDNode<"X86ISD::PEXT", SDTIntBinOp>; - // X86-specific multiply by immediate. def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td index 613a431fe365a..c6acaa697fdc7 100644 --- a/llvm/lib/Target/X86/X86InstrMisc.td +++ b/llvm/lib/Target/X86/X86InstrMisc.td @@ -1391,55 +1391,17 @@ multiclass PdepPext<string m, X86TypeInfo t, SDPatternOperator node, } let Predicates = [HasBMI2, NoEGPR] in { - defm PDEP32 : PdepPext<"pdep", Xi32, X86pdep>, XD, VEX; - defm PDEP64 : PdepPext<"pdep", Xi64, X86pdep>, XD, REX_W, VEX; - defm PEXT32 : PdepPext<"pext", Xi32, X86pext>, XS, VEX; - defm PEXT64 : PdepPext<"pext", Xi64, X86pext>, XS, REX_W, VEX; + defm PDEP32 : PdepPext<"pdep", Xi32, pdep>, XD, VEX; + defm PDEP64 : PdepPext<"pdep", Xi64, pdep>, XD, REX_W, VEX; + defm PEXT32 : PdepPext<"pext", Xi32, pext>, XS, VEX; + defm PEXT64 : PdepPext<"pext", Xi64, pext>, XS, REX_W, VEX; } let Predicates = [HasBMI2, HasEGPR] in { - defm PDEP32 : PdepPext<"pdep", Xi32, X86pdep, "_EVEX">, XD, EVEX; - defm PDEP64 : PdepPext<"pdep", Xi64, X86pdep, "_EVEX">, XD, REX_W, EVEX; - defm PEXT32 : PdepPext<"pext", Xi32, X86pext, "_EVEX">, XS, EVEX; - defm PEXT64 : PdepPext<"pext", Xi64, X86pext, "_EVEX">, XS, REX_W, EVEX; -} - -let Predicates = [HasBMI2, NoEGPR] in { - def : Pat<(i32 (pext GR32:$src, GR32:$mask)), - (PEXT32rr GR32:$src, GR32:$mask)>; - def : Pat<(i32 (pext GR32:$src, (loadi32 addr:$mask))), - (PEXT32rm GR32:$src, i32mem:$mask)>; - def : Pat<(i64 (pext GR64:$src, GR64:$mask)), - (PEXT64rr GR64:$src, GR64:$mask)>; - def : Pat<(i64 (pext GR64:$src, (loadi64 addr:$mask))), - (PEXT64rm GR64:$src, i64mem:$mask)>; - def : Pat<(i32 (pdep GR32:$src, GR32:$mask)), - (PDEP32rr GR32:$src, GR32:$mask)>; - def : Pat<(i32 (pdep GR32:$src, (loadi32 addr:$mask))), - (PDEP32rm GR32:$src, i32mem:$mask)>; - def : Pat<(i64 (pdep GR64:$src, GR64:$mask)), - (PDEP64rr GR64:$src, GR64:$mask)>; - def : Pat<(i64 (pdep GR64:$src, (loadi64 addr:$mask))), - (PDEP64rm GR64:$src, i64mem:$mask)>; -} - -let Predicates = [HasBMI2, HasEGPR] in { - def : Pat<(i32 (pext GR32:$src, GR32:$mask)), - (PEXT32rr_EVEX GR32:$src, GR32:$mask)>; - def : Pat<(i32 (pext GR32:$src, (loadi32 addr:$mask))), - (PEXT32rm_EVEX GR32:$src, i32mem:$mask)>; - def : Pat<(i64 (pext GR64:$src, GR64:$mask)), - (PEXT64rr_EVEX GR64:$src, GR64:$mask)>; - def : Pat<(i64 (pext GR64:$src, (loadi64 addr:$mask))), - (PEXT64rm_EVEX GR64:$src, i64mem:$mask)>; - def : Pat<(i32 (pdep GR32:$src, GR32:$mask)), - (PDEP32rr_EVEX GR32:$src, GR32:$mask)>; - def : Pat<(i32 (pdep GR32:$src, (loadi32 addr:$mask))), - (PDEP32rm_EVEX GR32:$src, i32mem:$mask)>; - def : Pat<(i64 (pdep GR64:$src, GR64:$mask)), - (PDEP64rr_EVEX GR64:$src, GR64:$mask)>; - def : Pat<(i64 (pdep GR64:$src, (loadi64 addr:$mask))), - (PDEP64rm_EVEX GR64:$src, i64mem:$mask)>; + defm PDEP32 : PdepPext<"pdep", Xi32, pdep, "_EVEX">, XD, EVEX; + defm PDEP64 : PdepPext<"pdep", Xi64, pdep, "_EVEX">, XD, REX_W, EVEX; + defm PEXT32 : PdepPext<"pext", Xi32, pext, "_EVEX">, XS, EVEX; + defm PEXT64 : PdepPext<"pext", Xi64, pext, "_EVEX">, XS, REX_W, EVEX; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 9e32ca23dafe2..a6b0db0230cf3 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1837,10 +1837,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0), X86_INTRINSIC_DATA(bmi_bzhi_64, INTR_TYPE_2OP, X86ISD::BZHI, 0), - X86_INTRINSIC_DATA(bmi_pdep_32, INTR_TYPE_2OP, X86ISD::PDEP, 0), - X86_INTRINSIC_DATA(bmi_pdep_64, INTR_TYPE_2OP, X86ISD::PDEP, 0), - X86_INTRINSIC_DATA(bmi_pext_32, INTR_TYPE_2OP, X86ISD::PEXT, 0), - X86_INTRINSIC_DATA(bmi_pext_64, INTR_TYPE_2OP, X86ISD::PEXT, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 3cd7515eb7670..050a4e8cb27e4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2660,6 +2660,64 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return &CI; break; } + case Intrinsic::pdep: + if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) { + if (MaskC->isNullValue()) + return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), 0)); + + if (MaskC->isAllOnesValue()) + return replaceInstUsesWith(*II, II->getArgOperand(0)); + + unsigned MaskIdx, MaskLen; + if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { + // any single contingous sequence of 1s anywhere in the mask simply + // describes a subset of the input bits shifted to the appropriate + // position. Replace with the straight forward IR. + Value *Input = II->getArgOperand(0); + Value *ShiftAmt = ConstantInt::get(II->getType(), MaskIdx); + Value *Shifted = Builder.CreateShl(Input, ShiftAmt); + Value *Masked = Builder.CreateAnd(Shifted, II->getArgOperand(1)); + return replaceInstUsesWith(*II, Masked); + } + + if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) { + // constant folding. + APInt Result = + llvm::APIntOps::expandBits(SrcC->getValue(), MaskC->getValue()); + return replaceInstUsesWith(*II, + ConstantInt::get(II->getType(), Result)); + } + } + break; + case Intrinsic::pext: + if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) { + if (MaskC->isNullValue()) + return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), 0)); + + if (MaskC->isAllOnesValue()) + return replaceInstUsesWith(*II, II->getArgOperand(0)); + + unsigned MaskIdx, MaskLen; + if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { + // any single contingous sequence of 1s anywhere in the mask simply + // describes a subset of the input bits shifted to the appropriate + // position. Replace with the straight forward IR. + Value *Input = II->getArgOperand(0); + Value *Masked = Builder.CreateAnd(Input, II->getArgOperand(1)); + Value *ShiftAmt = ConstantInt::get(II->getType(), MaskIdx); + Value *Shifted = Builder.CreateLShr(Masked, ShiftAmt); + return replaceInstUsesWith(*II, Shifted); + } + + if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) { + // constant folding. + APInt Result = + llvm::APIntOps::compressBits(SrcC->getValue(), MaskC->getValue()); + return replaceInstUsesWith(*II, + ConstantInt::get(II->getType(), Result)); + } + } + break; case Intrinsic::ptrmask: { unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType()); KnownBits Known(BitWidth); diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index bbc9f5d1b7506..f37e21f2c6dbb 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3333,6 +3333,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOriginForNaryOp(I); } + // Instrument packed bits deposit/expand intrinsics. + // All of these intrinsics are Z = I(X, Y) + // where the types of all operands and the result match. + // The following instrumentation happens to work for all of them: + // Sz = I(Sx, Y) | (sext (Sy != 0)) + void handlePackedBits(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Type *ShadowTy = getShadowTy(&I); + + // If any bit of the mask operand is poisoned, then the whole thing is. + Value *SMask = getShadow(&I, 1); + SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)), + ShadowTy); + // Apply the same intrinsic to the shadow of the first operand. + Value *S = IRB.CreateIntrinsic(I.getIntrinsicID(), ShadowTy, + {getShadow(&I, 0), I.getOperand(1)}); + setShadow(&I, IRB.CreateOr(SMask, S)); + setOriginForNaryOp(I); + } + /// Instrument llvm.memmove /// /// At this point we don't know if llvm.memmove will be inlined or not. @@ -5873,6 +5893,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { handleFunnelShift(I); break; + case Intrinsic::pdep: + case Intrinsic::pext: + handlePackedBits(I); + break; + case Intrinsic::is_constant: // The result of llvm.is.constant() is always defined. setShadow(&I, getCleanShadow(&I)); @@ -6503,10 +6528,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { case Intrinsic::x86_bmi_bextr_64: case Intrinsic::x86_bmi_bzhi_32: case Intrinsic::x86_bmi_bzhi_64: - case Intrinsic::x86_bmi_pdep_32: - case Intrinsic::x86_bmi_pdep_64: - case Intrinsic::x86_bmi_pext_32: - case Intrinsic::x86_bmi_pext_64: handleBmiIntrinsic(I); break; diff --git a/llvm/test/CodeGen/X86/bmi2.ll b/llvm/test/CodeGen/X86/bmi2.ll index cabeebb0c3f36..41585bde9a696 100644 --- a/llvm/test/CodeGen/X86/bmi2.ll +++ b/llvm/test/CodeGen/X86/bmi2.ll @@ -128,7 +128,7 @@ define i32 @pdep32_load(i32 %x, ptr %y) { define i32 @pdep32_anyext(i16 %x) { ; X86-LABEL: pdep32_anyext: ; X86: # %bb.0: -; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl $-1431655766, %ecx # imm = 0xAAAAAAAA ; X86-NEXT: pdepl %ecx, %eax, %eax ; X86-NEXT: retl @@ -178,7 +178,7 @@ define i32 @pdep32_demandedbits(i32 %x) { define i32 @pdep32_demandedbits2(i32 %x, i32 %y) { ; X86-LABEL: pdep32_demandedbits2: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: pdepl {{[0-9]+}}(%esp), %eax, %eax ; X86-NEXT: andl $128, %eax ; X86-NEXT: retl @@ -203,9 +203,8 @@ define i32 @pdep32_demandedbits2(i32 %x, i32 %y) { define i32 @pdep32_demandedbits_mask(i32 %x, i16 %y) { ; X86-LABEL: pdep32_demandedbits_mask: ; X86: # %bb.0: -; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: pdepl %eax, %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: pdepl {{[0-9]+}}(%esp), %eax, %eax ; X86-NEXT: andl $32768, %eax # imm = 0x8000 ; X86-NEXT: retl ; @@ -230,9 +229,8 @@ define i32 @pdep32_demandedbits_mask(i32 %x, i16 %y) { define i32 @pdep32_demandedbits_mask2(i32 %x, i16 %y) { ; X86-LABEL: pdep32_demandedbits_mask2: ; X86: # %bb.0: -; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: pdepl %eax, %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: pdepl {{[0-9]+}}(%esp), %eax, %eax ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: retl ; @@ -285,22 +283,23 @@ define i32 @pdep32_knownbits(i32 %x) { define i32 @pdep32_knownbits2(i32 %x, i32 %y) { ; X86-LABEL: pdep32_knownbits2: ; X86: # %bb.0: -; X86-NEXT: movl $-256, %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $8, %eax ; X86-NEXT: pdepl {{[0-9]+}}(%esp), %eax, %eax ; X86-NEXT: imull %eax, %eax ; X86-NEXT: retl ; ; X64-LABEL: pdep32_knownbits2: ; X64: # %bb.0: -; X64-NEXT: andl $-256, %edi +; X64-NEXT: andl $16776960, %edi # imm = 0xFFFF00 ; X64-NEXT: pdepl %esi, %edi, %eax ; X64-NEXT: imull %eax, %eax ; X64-NEXT: retq ; ; EGPR-LABEL: pdep32_knownbits2: ; EGPR: # %bb.0: -; EGPR-NEXT: andl $-256, %edi # encoding: [0x81,0xe7,0x00,0xff,0xff,0xff] +; EGPR-NEXT: andl $16776960, %edi # encoding: [0x81,0xe7,0x00,0xff,0xff,0x00] +; EGPR-NEXT: # imm = 0xFFFF00 ; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6] ; EGPR-NEXT: imull %eax, %eax # encoding: [0x0f,0xaf,0xc0] ; EGPR-NEXT: retq # encoding: [0xc3] diff --git a/llvm/test/Instrumentation/MemorySanitizer/bmi.ll b/llvm/test/Instrumentation/MemorySanitizer/bmi.ll index 46bec2956c73c..208546ec56246 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/bmi.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/bmi.ll @@ -110,9 +110,9 @@ define i32 @Test_pdep_32(i32 %a, i32 %b) sanitize_memory { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP0]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = sext i1 [[TMP2]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.x86.bmi.pdep.32(i32 [[TMP1]], i32 [[B]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.pdep.i32(i32 [[TMP1]], i32 [[B]]) ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[C:%.*]] = tail call i32 @llvm.x86.bmi.pdep.32(i32 [[A]], i32 [[B]]) +; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.pdep.i32(i32 [[A]], i32 [[B]]) ; CHECK-NEXT: store i32 [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[C]] ; @@ -131,9 +131,9 @@ define i64 @Test_pdep_64(i64 %a, i64 %b) sanitize_memory { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP0]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = sext i1 [[TMP2]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.x86.bmi.pdep.64(i64 [[TMP1]], i64 [[B]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.pdep.i64(i64 [[TMP1]], i64 [[B]]) ; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[C:%.*]] = tail call i64 @llvm.x86.bmi.pdep.64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: [[C:%.*]] = call i64 @llvm.pdep.i64(i64 [[A]], i64 [[B]]) ; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i64 [[C]] ; @@ -152,9 +152,9 @@ define i32 @Test_pext_32(i32 %a, i32 %b) sanitize_memory { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP0]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = sext i1 [[TMP2]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.x86.bmi.pext.32(i32 [[TMP1]], i32 [[B]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.pext.i32(i32 [[TMP1]], i32 [[B]]) ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[C:%.*]] = tail call i32 @llvm.x86.bmi.pext.32(i32 [[A]], i32 [[B]]) +; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.pext.i32(i32 [[A]], i32 [[B]]) ; CHECK-NEXT: store i32 [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[C]] ; @@ -173,9 +173,9 @@ define i64 @Test_pext_64(i64 %a, i64 %b) sanitize_memory { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP0]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = sext i1 [[TMP2]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.x86.bmi.pext.64(i64 [[TMP1]], i64 [[B]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.pext.i64(i64 [[TMP1]], i64 [[B]]) ; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[C:%.*]] = tail call i64 @llvm.x86.bmi.pext.64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: [[C:%.*]] = call i64 @llvm.pext.i64(i64 [[A]], i64 [[B]]) ; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i64 [[C]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/pdep.ll b/llvm/test/Instrumentation/MemorySanitizer/pdep.ll index 5a94f6abfa773..f323f386d0f50 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/pdep.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/pdep.ll @@ -7,10 +7,13 @@ target triple = "x86_64-unknown-linux-gnu" define i8 @Test_pdep_8(i8 %a, i8 %b) sanitize_memory { ; CHECK-LABEL: define i8 @Test_pdep_8( ; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP6:%.*]] = or i8 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP7]] to i8 +; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.pdep.i8(i8 [[TMP2]], i8 [[B]]) +; CHECK-NEXT: [[TMP6:%.*]] = or i8 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[C:%.*]] = tail call i8 @llvm.pdep.i8(i8 [[A]], i8 [[B]]) ; CHECK-NEXT: store i8 [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i8 [[C]] @@ -23,10 +26,13 @@ define i8 @Test_pdep_8(i8 %a, i8 %b) sanitize_memory { define i16 @Test_pdep_16(i16 %a, i16 %b) sanitize_memory { ; CHECK-LABEL: define i16 @Test_pdep_16( ; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP6:%.*]] = or i16 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP7]] to i16 +; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.pdep.i16(i16 [[TMP2]], i16 [[B]]) +; CHECK-NEXT: [[TMP6:%.*]] = or i16 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[C:%.*]] = tail call i16 @llvm.pdep.i16(i16 [[A]], i16 [[B]]) ; CHECK-NEXT: store i16 [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i16 [[C]] @@ -39,10 +45,13 @@ define i16 @Test_pdep_16(i16 %a, i16 %b) sanitize_memory { define i32 @Test_pdep_32(i32 %a, i32 %b) sanitize_memory { ; CHECK-LABEL: define i32 @Test_pdep_32( ; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.pdep.i32(i32 [[TMP2]], i32 [[B]]) +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[C:%.*]] = tail call i32 @llvm.pdep.i32(i32 [[A]], i32 [[B]]) ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[C]] @@ -55,10 +64,13 @@ define i32 @Test_pdep_32(i32 %a, i32 %b) sanitize_memory { define i64 @Test_pdep_64(i64 %a, i64 %b) sanitize_memory { ; CHECK-LABEL: define i64 @Test_pdep_64( ; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.pdep.i64(i64 [[TMP2]], i64 [[B]]) +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[C:%.*]] = tail call i64 @llvm.pdep.i64(i64 [[A]], i64 [[B]]) ; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i64 [[C]] @@ -70,10 +82,13 @@ define i64 @Test_pdep_64(i64 %a, i64 %b) sanitize_memory { define i128 @Test_pdep_128(i128 %a, i128 %b) sanitize_memory { ; CHECK-LABEL: define i128 @Test_pdep_128( ; CHECK-SAME: i128 [[A:%.*]], i128 [[B:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr getelementptr (i8, ptr @__msan_param_tls, i64 16), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP7]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = call i128 @llvm.pdep.i128(i128 [[TMP2]], i128 [[B]]) +; CHECK-NEXT: [[TMP6:%.*]] = or i128 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[C:%.*]] = tail call i128 @llvm.pdep.i128(i128 [[A]], i128 [[B]]) ; CHECK-NEXT: store i128 [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i128 [[C]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/pext.ll b/llvm/test/Instrumentation/MemorySanitizer/pext.ll index 72c4834998446..2caf6a47ac93b 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/pext.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/pext.ll @@ -7,10 +7,13 @@ target triple = "x86_64-unknown-linux-gnu" define i8 @Test_pext_8(i8 %a, i8 %b) sanitize_memory { ; CHECK-LABEL: define i8 @Test_pext_8( ; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP6:%.*]] = or i8 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP7]] to i8 +; CHECK-NEXT: [[TMP5:%.*]] = call i8 @llvm.pext.i8(i8 [[TMP2]], i8 [[B]]) +; CHECK-NEXT: [[TMP6:%.*]] = or i8 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[C:%.*]] = tail call i8 @llvm.pext.i8(i8 [[A]], i8 [[B]]) ; CHECK-NEXT: store i8 [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i8 [[C]] @@ -23,10 +26,13 @@ define i8 @Test_pext_8(i8 %a, i8 %b) sanitize_memory { define i16 @Test_pext_16(i16 %a, i16 %b) sanitize_memory { ; CHECK-LABEL: define i16 @Test_pext_16( ; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP6:%.*]] = or i16 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP7]] to i16 +; CHECK-NEXT: [[TMP5:%.*]] = call i16 @llvm.pext.i16(i16 [[TMP2]], i16 [[B]]) +; CHECK-NEXT: [[TMP6:%.*]] = or i16 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[C:%.*]] = tail call i16 @llvm.pext.i16(i16 [[A]], i16 [[B]]) ; CHECK-NEXT: store i16 [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i16 [[C]] @@ -39,10 +45,13 @@ define i16 @Test_pext_16(i16 %a, i16 %b) sanitize_memory { define i32 @Test_pext_32(i32 %a, i32 %b) sanitize_memory { ; CHECK-LABEL: define i32 @Test_pext_32( ; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.pext.i32(i32 [[TMP2]], i32 [[B]]) +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[C:%.*]] = tail call i32 @llvm.pext.i32(i32 [[A]], i32 [[B]]) ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i32 [[C]] @@ -55,10 +64,13 @@ define i32 @Test_pext_32(i32 %a, i32 %b) sanitize_memory { define i64 @Test_pext_64(i64 %a, i64 %b) sanitize_memory { ; CHECK-LABEL: define i64 @Test_pext_64( ; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr (i8, ptr @__msan_param_tls, i64 8), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.pext.i64(i64 [[TMP2]], i64 [[B]]) +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[C:%.*]] = tail call i64 @llvm.pext.i64(i64 [[A]], i64 [[B]]) ; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i64 [[C]] @@ -70,10 +82,13 @@ define i64 @Test_pext_64(i64 %a, i64 %b) sanitize_memory { define i128 @Test_pext_128(i128 %a, i128 %b) sanitize_memory { ; CHECK-LABEL: define i128 @Test_pext_128( ; CHECK-SAME: i128 [[A:%.*]], i128 [[B:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr getelementptr (i8, ptr @__msan_param_tls, i64 16), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP6:%.*]] = or i128 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP7]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = call i128 @llvm.pext.i128(i128 [[TMP2]], i128 [[B]]) +; CHECK-NEXT: [[TMP6:%.*]] = or i128 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[C:%.*]] = tail call i128 @llvm.pext.i128(i128 [[A]], i128 [[B]]) ; CHECK-NEXT: store i128 [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret i128 [[C]] diff --git a/llvm/test/Transforms/InstCombine/pdep.ll b/llvm/test/Transforms/InstCombine/pdep.ll index ceb4d1f97b6b0..b726e87a6168c 100644 --- a/llvm/test/Transforms/InstCombine/pdep.ll +++ b/llvm/test/Transforms/InstCombine/pdep.ll @@ -3,8 +3,7 @@ define i32 @test_pdep_32_zero_mask(i32 %x) nounwind readnone { ; CHECK-LABEL: @test_pdep_32_zero_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.pdep.i32(i32 [[X:%.*]], i32 0) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: ret i32 0 ; %1 = tail call i32 @llvm.pdep.i32(i32 %x, i32 0) ret i32 %1 @@ -12,8 +11,7 @@ define i32 @test_pdep_32_zero_mask(i32 %x) nounwind readnone { define i64 @test_pdep_64_zero_mask(i64 %x) nounwind readnone { ; CHECK-LABEL: @test_pdep_64_zero_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.pdep.i64(i64 [[X:%.*]], i64 0) -; CHECK-NEXT: ret i64 [[TMP1]] +; CHECK-NEXT: ret i64 0 ; %1 = tail call i64 @llvm.pdep.i64(i64 %x, i64 0) ret i64 %1 @@ -21,8 +19,7 @@ define i64 @test_pdep_64_zero_mask(i64 %x) nounwind readnone { define i32 @test_pdep_32_allones_mask(i32 %x) nounwind readnone { ; CHECK-LABEL: @test_pdep_32_allones_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.pdep.i32(i32 [[X:%.*]], i32 -1) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP1:%.*]] ; %1 = tail call i32 @llvm.pdep.i32(i32 %x, i32 -1) ret i32 %1 @@ -30,8 +27,7 @@ define i32 @test_pdep_32_allones_mask(i32 %x) nounwind readnone { define i64 @test_pdep_64_allones_mask(i64 %x) nounwind readnone { ; CHECK-LABEL: @test_pdep_64_allones_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.pdep.i64(i64 [[X:%.*]], i64 -1) -; CHECK-NEXT: ret i64 [[TMP1]] +; CHECK-NEXT: ret i64 [[TMP1:%.*]] ; %1 = tail call i64 @llvm.pdep.i64(i64 %x, i64 -1) ret i64 %1 @@ -39,7 +35,8 @@ define i64 @test_pdep_64_allones_mask(i64 %x) nounwind readnone { define i32 @test_pdep_32_shifted_mask(i32 %x) nounwind readnone { ; CHECK-LABEL: @test_pdep_32_shifted_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.pdep.i32(i32 [[X:%.*]], i32 12) +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[X:%.*]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP2]], 12 ; CHECK-NEXT: ret i32 [[TMP1]] ; %1 = tail call i32 @llvm.pdep.i32(i32 %x, i32 12) @@ -48,7 +45,8 @@ define i32 @test_pdep_32_shifted_mask(i32 %x) nounwind readnone { define i64 @test_pdep_64_shifted_mask(i64 %x) nounwind readnone { ; CHECK-LABEL: @test_pdep_64_shifted_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.pdep.i64(i64 [[X:%.*]], i64 12) +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[X:%.*]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP2]], 12 ; CHECK-NEXT: ret i64 [[TMP1]] ; %1 = tail call i64 @llvm.pdep.i64(i64 %x, i64 12) @@ -57,8 +55,7 @@ define i64 @test_pdep_64_shifted_mask(i64 %x) nounwind readnone { define i32 @test_pdep_32_constant_fold() nounwind readnone { ; CHECK-LABEL: @test_pdep_32_constant_fold( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.pdep.i32(i32 1985229328, i32 -252645136) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: ret i32 807407616 ; %1 = tail call i32 @llvm.pdep.i32(i32 1985229328, i32 4042322160) ret i32 %1 @@ -66,8 +63,7 @@ define i32 @test_pdep_32_constant_fold() nounwind readnone { define i64 @test_pdep_64_constant_fold() nounwind readnone { ; CHECK-LABEL: @test_pdep_64_constant_fold( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.pdep.i64(i64 8526495043095935640, i64 -1085102592571150096) -; CHECK-NEXT: ret i64 [[TMP1]] +; CHECK-NEXT: ret i64 -1089641583808049024 ; %1 = tail call i64 @llvm.pdep.i64(i64 8526495043095935640, i64 -1085102592571150096) ret i64 %1 @@ -75,8 +71,7 @@ define i64 @test_pdep_64_constant_fold() nounwind readnone { define i32 @test_pdep_32_constant_fold_2() nounwind readnone { ; CHECK-LABEL: @test_pdep_32_constant_fold_2( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.pdep.i32(i32 1985229328, i32 -16776961) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: ret i32 838860816 ; %1 = tail call i32 @llvm.pdep.i32(i32 1985229328, i32 4278190335) ret i32 %1 @@ -84,8 +79,7 @@ define i32 @test_pdep_32_constant_fold_2() nounwind readnone { define i64 @test_pdep_64_constant_fold_2() nounwind readnone { ; CHECK-LABEL: @test_pdep_64_constant_fold_2( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.pdep.i64(i64 8526495043095935640, i64 -72056498804490496) -; CHECK-NEXT: ret i64 [[TMP1]] +; CHECK-NEXT: ret i64 -144114243170822144 ; %1 = tail call i64 @llvm.pdep.i64(i64 8526495043095935640, i64 -72056498804490496) ret i64 %1 diff --git a/llvm/test/Transforms/InstCombine/pext.ll b/llvm/test/Transforms/InstCombine/pext.ll index 52baa9a171c62..0f13f3f542023 100644 --- a/llvm/test/Transforms/InstCombine/pext.ll +++ b/llvm/test/Transforms/InstCombine/pext.ll @@ -3,8 +3,7 @@ define i32 @test_pext_32_zero_mask(i32 %x) nounwind readnone { ; CHECK-LABEL: @test_pext_32_zero_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.pext.i32(i32 [[X:%.*]], i32 0) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: ret i32 0 ; %1 = tail call i32 @llvm.pext.i32(i32 %x, i32 0) ret i32 %1 @@ -12,8 +11,7 @@ define i32 @test_pext_32_zero_mask(i32 %x) nounwind readnone { define i64 @test_pext_64_zero_mask(i64 %x) nounwind readnone { ; CHECK-LABEL: @test_pext_64_zero_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.pext.i64(i64 [[X:%.*]], i64 0) -; CHECK-NEXT: ret i64 [[TMP1]] +; CHECK-NEXT: ret i64 0 ; %1 = tail call i64 @llvm.pext.i64(i64 %x, i64 0) ret i64 %1 @@ -21,8 +19,7 @@ define i64 @test_pext_64_zero_mask(i64 %x) nounwind readnone { define i32 @test_pext_32_allones_mask(i32 %x) nounwind readnone { ; CHECK-LABEL: @test_pext_32_allones_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.pext.i32(i32 [[X:%.*]], i32 -1) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP1:%.*]] ; %1 = tail call i32 @llvm.pext.i32(i32 %x, i32 -1) ret i32 %1 @@ -30,8 +27,7 @@ define i32 @test_pext_32_allones_mask(i32 %x) nounwind readnone { define i64 @test_pext_64_allones_mask(i64 %x) nounwind readnone { ; CHECK-LABEL: @test_pext_64_allones_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.pext.i64(i64 [[X:%.*]], i64 -1) -; CHECK-NEXT: ret i64 [[TMP1]] +; CHECK-NEXT: ret i64 [[TMP1:%.*]] ; %1 = tail call i64 @llvm.pext.i64(i64 %x, i64 -1) ret i64 %1 @@ -39,7 +35,8 @@ define i64 @test_pext_64_allones_mask(i64 %x) nounwind readnone { define i32 @test_pext_32_shifted_mask(i32 %x) nounwind readnone { ; CHECK-LABEL: @test_pext_32_shifted_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.pext.i32(i32 [[X:%.*]], i32 6) +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP2]], 3 ; CHECK-NEXT: ret i32 [[TMP1]] ; %1 = tail call i32 @llvm.pext.i32(i32 %x, i32 6) @@ -48,7 +45,8 @@ define i32 @test_pext_32_shifted_mask(i32 %x) nounwind readnone { define i64 @test_pext_64_shifted_mask(i64 %x) nounwind readnone { ; CHECK-LABEL: @test_pext_64_shifted_mask( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.pext.i64(i64 [[X:%.*]], i64 6) +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP2]], 3 ; CHECK-NEXT: ret i64 [[TMP1]] ; %1 = tail call i64 @llvm.pext.i64(i64 %x, i64 6) @@ -58,8 +56,7 @@ define i64 @test_pext_64_shifted_mask(i64 %x) nounwind readnone { define i32 @test_pext_32_constant_fold() nounwind readnone { ; CHECK-LABEL: @test_pext_32_constant_fold( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.pext.i32(i32 1985229328, i32 -252645136) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: ret i32 30001 ; %1 = tail call i32 @llvm.pext.i32(i32 1985229328, i32 4042322160) ret i32 %1 @@ -67,8 +64,7 @@ define i32 @test_pext_32_constant_fold() nounwind readnone { define i64 @test_pext_64_constant_fold() nounwind readnone { ; CHECK-LABEL: @test_pext_64_constant_fold( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.pext.i64(i64 8526495043095935640, i64 -1085102592571150096) -; CHECK-NEXT: ret i64 [[TMP1]] +; CHECK-NEXT: ret i64 1966210489 ; %1 = tail call i64 @llvm.pext.i64(i64 8526495043095935640, i64 -1085102592571150096) ret i64 %1 @@ -76,8 +72,7 @@ define i64 @test_pext_64_constant_fold() nounwind readnone { define i32 @test_pext_32_constant_fold_2() nounwind readnone { ; CHECK-LABEL: @test_pext_32_constant_fold_2( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.pext.i32(i32 1985229328, i32 -16776961) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: ret i32 30224 ; %1 = tail call i32 @llvm.pext.i32(i32 1985229328, i32 4278190335) ret i32 %1 @@ -85,8 +80,7 @@ define i32 @test_pext_32_constant_fold_2() nounwind readnone { define i64 @test_pext_64_constant_fold_2() nounwind readnone { ; CHECK-LABEL: @test_pext_64_constant_fold_2( -; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.pext.i64(i64 8526495043095935640, i64 -72056498804490496) -; CHECK-NEXT: ret i64 [[TMP1]] +; CHECK-NEXT: ret i64 1980816570 ; %1 = tail call i64 @llvm.pext.i64(i64 8526495043095935640, i64 -72056498804490496) ret i64 %1 >From 5049452103c09e5f800ceb2f08e90e3c13d00e3e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <[email protected]> Date: Thu, 18 Jun 2026 12:10:39 +0100 Subject: [PATCH 2/4] Use InstSimplify --- llvm/lib/Analysis/InstructionSimplify.cpp | 14 ++++++++++++++ .../Transforms/InstCombine/InstCombineCalls.cpp | 12 ------------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 7698d0d772a94..3b20592bcaed2 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6930,6 +6930,20 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, return Constant::getNullValue(ReturnType); break; } + case Intrinsic::pdep: { + if (match(Op1, m_Zero())) + return Constant::getNullValue(ReturnType); + if (match(Op1, m_AllOnes())) + return Op0; + break; + } + case Intrinsic::pext: { + if (match(Op1, m_Zero())) + return Constant::getNullValue(ReturnType); + if (match(Op1, m_AllOnes())) + return Op0; + break; + } case Intrinsic::ptrmask: { // NOTE: We can't apply this simplifications based on the value of Op1 // because we need to preserve provenance. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 050a4e8cb27e4..980fd6391bbd1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2662,12 +2662,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } case Intrinsic::pdep: if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) { - if (MaskC->isNullValue()) - return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), 0)); - - if (MaskC->isAllOnesValue()) - return replaceInstUsesWith(*II, II->getArgOperand(0)); - unsigned MaskIdx, MaskLen; if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { // any single contingous sequence of 1s anywhere in the mask simply @@ -2691,12 +2685,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { break; case Intrinsic::pext: if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) { - if (MaskC->isNullValue()) - return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), 0)); - - if (MaskC->isAllOnesValue()) - return replaceInstUsesWith(*II, II->getArgOperand(0)); - unsigned MaskIdx, MaskLen; if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { // any single contingous sequence of 1s anywhere in the mask simply >From 40076a7e0f3c2dc863d3e5d94b42c10c367ecf98 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <[email protected]> Date: Thu, 18 Jun 2026 12:21:50 +0100 Subject: [PATCH 3/4] Constant folding --- llvm/lib/Analysis/ConstantFolding.cpp | 10 ++++++++++ .../Transforms/InstCombine/InstCombineCalls.cpp | 16 ---------------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 3fe78d6c4322d..f18b7a0b66a21 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1756,6 +1756,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::fshl: case Intrinsic::fshr: case Intrinsic::clmul: + case Intrinsic::pdep: + case Intrinsic::pext: case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::masked_load: @@ -3904,6 +3906,14 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty, if (!C0 || !C1) return Constant::getNullValue(Ty); return ConstantInt::get(Ty, APIntOps::clmul(*C0, *C1)); + case Intrinsic::pdep: + if (!C0 || !C1) + return Constant::getNullValue(Ty); + return ConstantInt::get(Ty, APIntOps::expandBits(*C0, *C1)); + case Intrinsic::pext: + if (!C0 || !C1) + return Constant::getNullValue(Ty); + return ConstantInt::get(Ty, APIntOps::compressBits(*C0, *C1)); case Intrinsic::amdgcn_wave_reduce_umin: case Intrinsic::amdgcn_wave_reduce_umax: case Intrinsic::amdgcn_wave_reduce_max: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 980fd6391bbd1..9e36aaaf8b508 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2673,14 +2673,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Value *Masked = Builder.CreateAnd(Shifted, II->getArgOperand(1)); return replaceInstUsesWith(*II, Masked); } - - if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) { - // constant folding. - APInt Result = - llvm::APIntOps::expandBits(SrcC->getValue(), MaskC->getValue()); - return replaceInstUsesWith(*II, - ConstantInt::get(II->getType(), Result)); - } } break; case Intrinsic::pext: @@ -2696,14 +2688,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Value *Shifted = Builder.CreateLShr(Masked, ShiftAmt); return replaceInstUsesWith(*II, Shifted); } - - if (auto *SrcC = dyn_cast<ConstantInt>(II->getArgOperand(0))) { - // constant folding. - APInt Result = - llvm::APIntOps::compressBits(SrcC->getValue(), MaskC->getValue()); - return replaceInstUsesWith(*II, - ConstantInt::get(II->getType(), Result)); - } } break; case Intrinsic::ptrmask: { >From 7f95f8b1f959733e851081c7a25dc975758b49ae Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <[email protected]> Date: Thu, 18 Jun 2026 12:29:58 +0100 Subject: [PATCH 4/4] Use PatternMatch to collect packed bits masks --- .../InstCombine/InstCombineCalls.cpp | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 9e36aaaf8b508..1df156053e302 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2660,11 +2660,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return &CI; break; } - case Intrinsic::pdep: - if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) { + case Intrinsic::pdep: { + const APInt *MaskC; + if (match(II->getArgOperand(1), m_APInt(MaskC))) { unsigned MaskIdx, MaskLen; - if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { - // any single contingous sequence of 1s anywhere in the mask simply + if (MaskC->isShiftedMask(MaskIdx, MaskLen)) { + // any single contiguous sequence of 1s anywhere in the mask simply // describes a subset of the input bits shifted to the appropriate // position. Replace with the straight forward IR. Value *Input = II->getArgOperand(0); @@ -2675,11 +2676,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } } break; - case Intrinsic::pext: - if (auto *MaskC = dyn_cast<ConstantInt>(II->getArgOperand(1))) { + } + case Intrinsic::pext: { + const APInt *MaskC; + if (match(II->getArgOperand(1), m_APInt(MaskC))) { unsigned MaskIdx, MaskLen; - if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { - // any single contingous sequence of 1s anywhere in the mask simply + if (MaskC->isShiftedMask(MaskIdx, MaskLen)) { + // any single contiguous sequence of 1s anywhere in the mask simply // describes a subset of the input bits shifted to the appropriate // position. Replace with the straight forward IR. Value *Input = II->getArgOperand(0); @@ -2690,6 +2693,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } } break; + } case Intrinsic::ptrmask: { unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType()); KnownBits Known(BitWidth); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
