[llvm-branch-commits] [llvm] release/18.x: [SROA]: Only defer trying partial sized ptr or ptr vector types (PR #86114)
https://github.com/jrbyrnes approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/86114 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] e0fb937 - debugging v2i8/v3i8
Author: Jeffrey Byrnes Date: 2022-10-13T11:52:20-07:00 New Revision: e0fb937455d9339a286f82fc2a2a9c38a0370831 URL: https://github.com/llvm/llvm-project/commit/e0fb937455d9339a286f82fc2a2a9c38a0370831 DIFF: https://github.com/llvm/llvm-project/commit/e0fb937455d9339a286f82fc2a2a9c38a0370831.diff LOG: debugging v2i8/v3i8 Added: Modified: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm/lib/CodeGen/TargetLoweringBase.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 629e7b84cf71..528ee108408f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -966,12 +966,32 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; #ifndef NDEBUG - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) +/* + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { +errs() << "Checking legality of: \n"; +auto temp = Node->getOperand(i-1); +temp.dump(); +errs() << "\n"; assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == TargetLowering::TypeLegal && "Unexpected illegal type!"); + } +*/ + errs() << "Quick Legal Check\n"; + for (const SDValue &Op : Node->op_values()) { +Op.dump(); +errs() << "\n"; +assert(TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == + TargetLowering::TypeLegal && + "Unexpected illegal type!"); + } + + errs() << "Full Legal Check\n"; for (const SDValue &Op : Node->op_values()) { +errs() << "Checking op: \n"; +Op.dump(); +errs() << "\n"; if (TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal) errs() << "TargetLowering::TypeLegal\n"; if (Op.getOpcode() == ISD::Register) errs() << "Register\n"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 973631848662..363bafb48c55 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -954,12 +954,16 @@ void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) { TargetLoweringBase::LegalizeKind TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { + errs() << "in TLB::getTypeConv\n"; // If this is a simple type, use the ComputeRegisterProp mechanism. if (VT.isSimple()) { +errs() << "isSimple]\n"; MVT SVT = VT.getSimpleVT(); assert((unsigned)SVT.SimpleTy < std::size(TransformToType)); MVT NVT = TransformToType[SVT.SimpleTy]; +errs() << "Found TypeTransform" << (int)NVT.SimpleTy << "\n"; LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT); +errs() << "Found TypeAction: " << (int)LA << "\n"; assert((LA == TypeLegal || LA == TypeSoftenFloat || LA == TypeSoftPromoteHalf || diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f962e49418c5..f92bde72867a 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -245,7 +245,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v3i64, MVT::v3f64, MVT::v6i32, MVT::v6f32, MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64, MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16, -MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32, MVT::v4i8}) { +MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32, MVT::v4i8, +MVT::v2i8}) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { switch (Op) { case ISD::LOAD: @@ -5777,6 +5778,8 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); + + EVT ResultVT = Op.getValueType(); SDValue Vec = Op.getOperand(0); SDValue Idx = Op.getOperand(1); @@ -5784,6 +5787,8 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, unsigned VecSize = VecVT.getSizeInBits(); EVT EltVT = VecVT.getVectorElementType(); + errs() << "found EVE with res: " << ResultVT.getEVTString() << " and src: " << VecVT.getEVTString() << "\n"; + DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr); // Make sure we do any optimizations that will make it easier to fold @@ -5859,6 +5864,11 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result); } + if (ResultVT == MVT::i8) { +SDValue Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i8, Elt); +return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result); + } + return DAG.getAnyExtOrTrunc(Elt, SL, ResultVT); } @@ -6038,11 +6048,11 @
[llvm-branch-commits] [llvm] f431123 - resolved issues with ret v2i8
Author: Jeffrey Byrnes Date: 2022-10-13T14:13:27-07:00 New Revision: f431123ac5be268c4707d7f16878039c6051e71c URL: https://github.com/llvm/llvm-project/commit/f431123ac5be268c4707d7f16878039c6051e71c DIFF: https://github.com/llvm/llvm-project/commit/f431123ac5be268c4707d7f16878039c6051e71c.diff LOG: resolved issues with ret v2i8 Added: Modified: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 528ee108408f..081d8d96c9e2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -958,6 +958,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { /// Return a legal replacement for the given operation, with all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + errs() << "\n\n\nDAG BEFORE\n"; + DAG.dump(); + errs() << "\n"; + LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); // Allow illegal target nodes and illegal registers. @@ -1310,10 +1314,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); return; case TargetLowering::Custom: + errs() << "from legalizeDAG.cpp\n"; LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); // FIXME: The handling for custom lowering with multiple results is // a complete mess. if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { +errs() << "TLI.LowerOperation returned\n"; if (!(Res.getNode() != Node || Res.getResNo() != 0)) return; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 95cd5371814e..5573acb5f6e5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1131,6 +1131,7 @@ SDValue SelectionDAGBuilder::getControlRoot() { void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. if (I.isTerminator()) { +errs() << "Is terminator\n"; HandlePHINodesInSuccessorBlocks(I.getParent()); } @@ -1149,6 +1150,7 @@ void SelectionDAGBuilder::visit(const Instruction &I) { DAG, [&](SDNode *) { NodeInserted = true; }); } + errs() << "calling visit with opcode " << I.getOpcodeName() << "\n"; visit(I.getOpcode(), I); if (!I.isTerminator() && !HasTailCall && @@ -1936,6 +1938,7 @@ void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) { } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { + errs() << "in visitRet\n"; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); SDValue Chain = getControlRoot(); @@ -1955,6 +1958,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { } if (!FuncInfo.CanLowerReturn) { +errs() << "!CanLowerReturn\n"; unsigned DemoteReg = FuncInfo.DemoteRegister; const Function *F = I.getParent()->getParent(); @@ -1998,9 +2002,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, Chains); } else if (I.getNumOperands() != 0) { +errs() << "CanReturn && NumOpers !=0\n"; SmallVector ValueVTs; ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); +errs() << "NumValues: " << NumValues << "\n"; if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); @@ -2027,9 +2033,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { CallingConv::ID CC = F->getCallingConv(); +errs() << "calling getNumRegs for CallConv\n"; unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT); MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT); SmallVector Parts(NumParts); +errs() << "Calling getCopyToParts with NumParts: " << NumParts << "\n"; getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), &Parts[0], NumParts, PartVT, &I, CC, ExtendKind); @@ -2067,6 +2075,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { } } + errs() << "Made it passed end of condition\n"; + // Push in swifterror virtual register as the last element of Outs. This makes // sure swi
[llvm-branch-commits] [llvm] 30fc9fa - cleaned up print statements, checking load/store behavior
Author: Jeffrey Byrnes Date: 2022-10-14T07:56:45-07:00 New Revision: 30fc9fa3a4695f99b0aaabcec2e05118e8ee4b61 URL: https://github.com/llvm/llvm-project/commit/30fc9fa3a4695f99b0aaabcec2e05118e8ee4b61 DIFF: https://github.com/llvm/llvm-project/commit/30fc9fa3a4695f99b0aaabcec2e05118e8ee4b61.diff LOG: cleaned up print statements, checking load/store behavior Added: Modified: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm/lib/CodeGen/TargetLoweringBase.cpp Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 081d8d96c9e2..fe358aa89881 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -958,10 +958,6 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { /// Return a legal replacement for the given operation, with all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { - errs() << "\n\n\nDAG BEFORE\n"; - DAG.dump(); - errs() << "\n"; - LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); // Allow illegal target nodes and illegal registers. @@ -970,35 +966,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; #ifndef NDEBUG -/* + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { -errs() << "Checking legality of: \n"; -auto temp = Node->getOperand(i-1); -temp.dump(); -errs() << "\n"; assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == TargetLowering::TypeLegal && "Unexpected illegal type!"); } -*/ - errs() << "Quick Legal Check\n"; - for (const SDValue &Op : Node->op_values()) { -Op.dump(); -errs() << "\n"; -assert(TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == - TargetLowering::TypeLegal && - "Unexpected illegal type!"); - } - - errs() << "Full Legal Check\n"; for (const SDValue &Op : Node->op_values()) { -errs() << "Checking op: \n"; -Op.dump(); -errs() << "\n"; -if (TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == - TargetLowering::TypeLegal) errs() << "TargetLowering::TypeLegal\n"; -if (Op.getOpcode() == ISD::Register) errs() << "Register\n"; assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || Op.getOpcode() == ISD::TargetConstant || @@ -1314,12 +1289,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); return; case TargetLowering::Custom: - errs() << "from legalizeDAG.cpp\n"; LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); // FIXME: The handling for custom lowering with multiple results is // a complete mess. if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { -errs() << "TLI.LowerOperation returned\n"; if (!(Res.getNode() != Node || Res.getResNo() != 0)) return; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 363bafb48c55..e6b577b4cc68 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -954,16 +954,13 @@ void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) { TargetLoweringBase::LegalizeKind TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { - errs() << "in TLB::getTypeConv\n"; // If this is a simple type, use the ComputeRegisterProp mechanism. if (VT.isSimple()) { -errs() << "isSimple]\n"; MVT SVT = VT.getSimpleVT(); assert((unsigned)SVT.SimpleTy < std::size(TransformToType)); MVT NVT = TransformToType[SVT.SimpleTy]; -errs() << "Found TypeTransform" << (int)NVT.SimpleTy << "\n"; LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT); -errs() << "Found TypeAction: " << (int)LA << "\n"; + assert((LA == TypeLegal || LA == TypeSoftenFloat || LA == TypeSoftPromoteHalf || ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 85982d6 - new selection patterns for load/store
Author: Jeffrey Byrnes Date: 2022-10-14T09:52:11-07:00 New Revision: 85982d60133d2bfdabb33dbf95b1dce3f9754ae7 URL: https://github.com/llvm/llvm-project/commit/85982d60133d2bfdabb33dbf95b1dce3f9754ae7 DIFF: https://github.com/llvm/llvm-project/commit/85982d60133d2bfdabb33dbf95b1dce3f9754ae7.diff LOG: new selection patterns for load/store Added: Modified: llvm/lib/Target/AMDGPU/BUFInstructions.td llvm/lib/Target/AMDGPU/FLATInstructions.td llvm/lib/Target/AMDGPU/SIISelLowering.cpp Removed: diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 09f3035c6215..47563dafe56c 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -949,6 +949,10 @@ defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>; +//defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i8, atomic_load_8_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", v2i8, atomic_load_16_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", v2i8, atomic_load_8_global>; + defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores < "buffer_store_byte", i32, truncstorei8_global >; @@ -1758,6 +1762,15 @@ defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; +//defm : MUBUFLoad_Pattern ; + } // End OtherPredicates = [Has16BitInsts] multiclass MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; + +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +//defm : MUBUFScratchLoadPat ; + foreach vt = Reg32Types.types in { defm : MUBUFScratchLoadPat ; } @@ -1847,6 +1867,9 @@ defm : MUBUFStore_Atomic_Pattern ; defm : MUBUFStore_Atomic_Pattern ; defm : MUBUFStore_Atomic_Pattern ; +//defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; } // End Predicates = isGFX6GFX7 @@ -1861,6 +1884,9 @@ multiclass MUBUFStore_Pattern ; defm : MUBUFStore_Pattern ; +defm : MUBUFStore_Pattern ; +defm : MUBUFStore_Pattern ; +//defm : MUBUFStore_Pattern ; multiclass MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +//defm : MUBUFScratchStorePat ; foreach vt = Reg32Types.types in { defm : MUBUFScratchStorePat ; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index b7f9c558f83a..2f349d12167c 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1106,14 +1106,21 @@ let OtherPredicates = [HasFlatAddressSpace] in { def : FlatLoadPat ; def : FlatLoadPat ; +//def : FlatLoadPat ; +def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; +def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; +//def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; @@ -1125,6 +1132,9 @@ def : FlatLoadPat ; def : FlatStorePat ; def : FlatStorePat ; +def : FlatStorePat ; +//def : FlatStorePat ; +def : FlatStorePat ; foreach vt = Reg32Types.types in { def : FlatLoadPat ; @@ -1150,6 +1160,10 @@ def : FlatStoreAtomicPat ; def : FlatStoreAtomicPat ; def : FlatStoreAtomicPat ; +//def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; + foreach as = [ "flat", "global" ] in { defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>; defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>; @@ -1350,18 +1364,29 @@ let OtherPredicates = [HasFlatGlobalInsts] in { defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; +//defm : GlobalFLATLoadPats ; +defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; +defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; +defm : GlobalFLATLoadPats ; +defm : GlobalFLATLoadPats ; +defm : GlobalFLATLoadPats ; +//defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; +defm : GlobalFLATLoadPats ; + + + foreach vt = Reg32Types.types in { defm : GlobalFLATLoadPats ; @@ -1392,6 +1417,11 @@ defm : GlobalFLATStorePats ; defm : GlobalFLATStor
[llvm-branch-commits] [llvm] ec1747c - Able to produce good initial SelectionDAG for ret. resolved extract_subvector legalizing, able to build the test.ll
Author: Jeffrey Byrnes Date: 2022-10-13T10:48:41-07:00 New Revision: ec1747cb71d0db73b268d17367b83652cd4e2ad3 URL: https://github.com/llvm/llvm-project/commit/ec1747cb71d0db73b268d17367b83652cd4e2ad3 DIFF: https://github.com/llvm/llvm-project/commit/ec1747cb71d0db73b268d17367b83652cd4e2ad3.diff LOG: Able to produce good initial SelectionDAG for ret. resolved extract_subvector legalizing, able to build the test.ll Added: Modified: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/lib/Target/AMDGPU/SIInstructions.td llvm/lib/Target/AMDGPU/SIRegisterInfo.td Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e62f57c536b37..629e7b84cf71d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -971,12 +971,16 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { TargetLowering::TypeLegal && "Unexpected illegal type!"); - for (const SDValue &Op : Node->op_values()) + for (const SDValue &Op : Node->op_values()) { +if (TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == + TargetLowering::TypeLegal) errs() << "TargetLowering::TypeLegal\n"; +if (Op.getOpcode() == ISD::Register) errs() << "Register\n"; assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || Op.getOpcode() == ISD::TargetConstant || Op.getOpcode() == ISD::Register) && "Unexpected illegal type!"); + } #endif // Figure out the correct action; the way to query this varies by opcode diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index b6c66077675ff..523788106db63 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -22,28 +22,28 @@ def CC_SI_Gfx : CallingConv<[ // 32 is reserved for the stack pointer // 33 is reserved for the frame pointer // 34 is reserved for the base pointer - CCIfInReg>>, - CCIfNotInReg>>, - CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>> + CCIfType<[i32, f32, v2i16, v2f16, i16, f16, v4i8, i1], CCAssignToStack<4, 4>> ]>; def RetCC_SI_Gfx : CallingConv<[ CCIfType<[i1], CCPromoteToType>, CCIfType<[i1, i16], CCIfExtend>>, - CCIfNotInReg>>, // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs. - CCIfNotInReg>>, - CCIfType<[i32, i16] , CCAssignToReg<[ + CCIfType<[i32, i16, v4i8] , CCAssignToReg<[ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, @@ -183,19 +183,19 @@ def CC_AMDGPU_Func : CallingConv<[ CCIfByVal>, CCIfType<[i1], CCPromoteToType>, CCIfType<[i8, i16], CCIfExtend>>, - CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[ + CCIfType<[i32, f32, i16, f16, v2i16, v2f16, v4i8, i1], CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, - CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>> + CCIfType<[i32, f32, v2i16, v2f16, i16, f16, v4i8, i1], CCAssignToStack<4, 4>> ]>; // Calling convention for leaf functions def RetCC_AMDGPU_Func : CallingConv<[ CCIfType<[i1], CCPromoteToType>, CCIfType<[i1, i16], CCIfExtend>>, - CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[ + CCIfType<[i32, f32, i16, f16, v2i16, v2f16, v4i8], CCAssignToReg<[ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 9c2247f336ee1..9980e851f9820 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -322,7 +322,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, MVT::v7i32, MVT::v8f32, MVT::v8i32, MVT::v16f16, MVT::v16i16, MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32, MVT::v2f64, MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64, MVT::v4i64, - MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64}, + MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64, MVT::v2i8, + MVT::v4i8}, Custom); setOperat
[llvm-branch-commits] [llvm] bb408f1 - save for switching
Author: Jeffrey Byrnes Date: 2022-10-14T14:54:05-07:00 New Revision: bb408f1e1a8a97826b28e3e9327bd8ad91dbd5a1 URL: https://github.com/llvm/llvm-project/commit/bb408f1e1a8a97826b28e3e9327bd8ad91dbd5a1 DIFF: https://github.com/llvm/llvm-project/commit/bb408f1e1a8a97826b28e3e9327bd8ad91dbd5a1.diff LOG: save for switching Added: Modified: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/lib/Target/AMDGPU/SIInstructions.td Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index fe358aa89881..6c1c296d8014 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -974,6 +974,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } for (const SDValue &Op : Node->op_values()) { +errs() << "Checking op: "; +Op.dump(); +errs() << "\n"; assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || Op.getOpcode() == ISD::TargetConstant || diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index abb864c6a829..c32f92cd0da0 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -84,8 +84,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass); addRegisterClass(MVT::v4i8, &AMDGPU::SReg_32RegClass); - //addRegisterClass(MVT::v2i8, &AMDGPU::SReg_32RegClass); + addRegisterClass(MVT::v2i8, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::i8, &AMDGPU::SReg_32RegClass); + //addRegisterClass(MVT::i8, &AMDGPU::VReg_32RegClass); addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass); @@ -5719,9 +5720,14 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, unsigned EltSize = EltVT.getSizeInBits(); SDLoc SL(Op); + // Specially handle the case of v4i16 with static indexing. unsigned NumElts = VecVT.getVectorNumElements(); auto KIdx = dyn_cast(Idx); + + errs() << "legalizing insert_ve with num elts, eltsize " << NumElts << " " << EltSize << "\n"; + + if (NumElts == 4 && EltSize == 16 && KIdx) { SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Vec); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index d88272fc485c..f6644d131b68 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2788,7 +2788,29 @@ def : GCNPat < (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1) >; +/* +def : GCNPat < + (v4i8 (build_vector (i8:$src0), (i8:$src1), (i8:$src2), (i8:$src3))), + (v4i8 (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src3, (i32 24))), (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src2, (i32 16))), (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src1, (i32 8))), SReg_32:$src0))) +>; + + +def : GCNPat < + (v2i8 (build_vector (i8:$src0), (i8:$src1))), + (v2i8 (i16 (V_OR_B32_e64 (i16 (S_LSHL_B32 SReg_32:$src1, (i32 8))), SReg_32:$src0))) +>; + +def : GCNPat < + (v2i8 (build_vector i8:$src0, (i8 undef))), + (COPY $src0) +>; + +def : GCNPat < + (v2i8 (DivergentBinFrag (i8 undef), (i8 SReg_32:$src1))), + (v2i8 (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1)) +>; +*/ foreach Ty = [i16, f16] in { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 39bf272 - tablegen accepts i8 as operand in patterns
Author: Jeffrey Byrnes Date: 2022-10-14T16:23:25-07:00 New Revision: 39bf272b7d5086b982f0ec4b4aa545310f8ef20a URL: https://github.com/llvm/llvm-project/commit/39bf272b7d5086b982f0ec4b4aa545310f8ef20a DIFF: https://github.com/llvm/llvm-project/commit/39bf272b7d5086b982f0ec4b4aa545310f8ef20a.diff LOG: tablegen accepts i8 as operand in patterns Added: Modified: llvm/lib/Target/AMDGPU/SIInstructions.td llvm/lib/Target/AMDGPU/SIRegisterInfo.td Removed: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index f6644d131b68..b0bf6aca56b5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1308,6 +1308,7 @@ foreach Index = 0-31 in { // FIXME: Why do only some of these type combinations for SReg and // VReg? // 16-bit bitcast + def : BitConvert ; def : BitConvert ; def : BitConvert ; @@ -2788,13 +2789,58 @@ def : GCNPat < (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1) >; -/* def : GCNPat < - (v4i8 (build_vector (i8:$src0), (i8:$src1), (i8:$src2), (i8:$src3))), - (v4i8 (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src3, (i32 24))), (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src2, (i32 16))), (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src1, (i32 8))), SReg_32:$src0))) + (v2i8 (DivergentBinFrag (i8 0), (i8 SReg_32:$src1))), + (v2i8 (V_LSHLREV_B32_e64 (i8 8), SReg_32:$src1)) >; +def : GCNPat < + (v4i8 (build_vector (i8 SReg_32:$src0), (i8 SReg_32:$src1), (i8 SReg_32:$src2), (i8 SReg_32:$src3))), + + + (v4i8 + + (V_OR_B32_e64 + + (S_LSHL_B32 + SReg_32:$src3, + (i32 24) + ) + , + + (V_OR_B32_e64 + + (S_LSHL_B32 + SReg_32:$src2, + (i32 16) + ) + , + + (V_OR_B32_e64 + + (S_LSHL_B32 + SReg_32:$src1, + (i32 8) + ) + , + SReg_32:$src0 + ) + + ) + ) + + +) +>; + +/* +def : GCNPat < + (v4i8 (build_vector (i8 SReg_32:$src0), (i8 SReg_32:$src1), (i8 SReg_32:$src2), (i8 SReg_32:$src3))), + (v4i8 (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src3, (i32 24))), (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src2, (i32 16))), (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src1, (i32 8))), SReg_32:$src0))) +>; +*/ +/* def : GCNPat < (v2i8 (build_vector (i8:$src0), (i8:$src1))), (v2i8 (i16 (V_OR_B32_e64 (i16 (S_LSHL_B32 SReg_32:$src1, (i32 8))), SReg_32:$src0))) @@ -2808,10 +2854,11 @@ def : GCNPat < def : GCNPat < (v2i8 (DivergentBinFrag (i8 undef), (i8 SReg_32:$src1))), - (v2i8 (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1)) + (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1) >; */ + foreach Ty = [i16, f16] in { defvar vecTy = !if(!eq(Ty, i16), v2i16, v2f16); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index c07333b17ff3..4db31c87ac06 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -369,7 +369,7 @@ def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, } // SGPR 32-bit registers -def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, v4i8, v2i8], 32, +def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, v4i8, v2i8, i8], 32, (add (sequence "SGPR%u", 0, 105))> { // Give all SGPR classes higher priority than VGPR classes, because // we want to spill SGPRs to VGPRs. @@ -406,7 +406,7 @@ def SGPR_512Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 16, "s" def SGPR_1024Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 32, "s">; // Trap handler TMP 32-bit registers -def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16, v4i8, v2i8], 32, +def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16, v4i8, v2i8, i8], 32, (add (sequence "TTMP%u", 0, 15))> { let isAllocatable = 0; let HasSGPR = 1; @@ -527,8 +527,8 @@ class RegisterTypes reg_types> { list types = reg_types; } -
[llvm-branch-commits] [llvm] 37c65eb - legalize IVE, v2i8, v4i8
Author: Jeffrey Byrnes Date: 2022-10-17T15:29:54-07:00 New Revision: 37c65ebbcc0b7106fba7bb791a36d7ddabc60ece URL: https://github.com/llvm/llvm-project/commit/37c65ebbcc0b7106fba7bb791a36d7ddabc60ece DIFF: https://github.com/llvm/llvm-project/commit/37c65ebbcc0b7106fba7bb791a36d7ddabc60ece.diff LOG: legalize IVE, v2i8, v4i8 Added: Modified: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/lib/Target/AMDGPU/SIInstructions.td Removed: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 62ac1fcd95ce..37d907059687 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -577,6 +577,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { break; } +if (VT.getScalarSizeInBits() == 8) { + break; +} + assert(VT.getVectorElementType().bitsEq(MVT::i32)); unsigned RegClassID = SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c32f92cd0da0..f25bcdd28d9c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -171,7 +171,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1, MVT::v32i32}, Custom); - + + //setTruncStoreAction(MVT::i8, MVT::i32, Expand); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); @@ -5729,6 +5730,12 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, if (NumElts == 4 && EltSize == 16 && KIdx) { +//errs() << "special case for v4i16\n"; +//errs() << "VecVT, Op1VT, EltVT: "; +errs() << VecVT.getEVTString() << " " << InsVal.getValueType().getEVTString() << " "; +errs() << EltVT.getEVTString() << "\n"; + + SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Vec); SDValue LoHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BCVec, @@ -5755,6 +5762,46 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, return DAG.getNode(ISD::BITCAST, SL, VecVT, Concat); } + if (NumElts == 4 && EltSize == 8 && KIdx) { +errs() << "special case for v4i8\n"; +errs() << "VecVT, Op1VT, EltVT: "; +errs() << VecVT.getEVTString() << " " << InsVal.getValueType().getEVTString() << " "; +errs() << EltVT.getEVTString() << "\n"; + + +errs() << "First bitcast\n"; +SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Vec); + +SDValue LoHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i16, BCVec, + DAG.getConstant(0, SL, MVT::i32)); +SDValue HiHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i16, BCVec, + DAG.getConstant(1, SL, MVT::i32)); + +errs() << "Second bitcast\n"; +SDValue LoVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i8, LoHalf); +errs() << "Third bitcast\n"; +SDValue HiVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i8, HiHalf); + +unsigned Idx = KIdx->getZExtValue(); +bool InsertLo = Idx < 2; +SDValue InsHalf = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, MVT::v2i8, + InsertLo ? LoVec : HiVec, + DAG.getNode(ISD::BITCAST, SL, MVT::i8, InsVal), + DAG.getConstant(InsertLo ? Idx : (Idx - 2), SL, MVT::i32)); + +errs() << "Fourth bitcast\n"; +InsHalf = DAG.getNode(ISD::BITCAST, SL, MVT::i16, InsHalf); + +SDValue Concat = InsertLo ? + DAG.getBuildVector(MVT::v2i16, SL, { InsHalf, HiHalf }) : + DAG.getBuildVector(MVT::v2i16, SL, { LoHalf, InsHalf }); + +return DAG.getNode(ISD::BITCAST, SL, VecVT, Concat); + } + + + + // Static indexing does not lower to stack access, and hence there is no need // for special custom lowering to avoid stack access. if (isa(Idx)) @@ -5885,11 +5932,12 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result); } +/* if (ResultVT == MVT::i8) { SDValue Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i8, Elt); return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result); } - +*/ return DAG.getAnyExtOrTrunc(Elt, SL, ResultVT); } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index b0bf6aca56b5..f9129eaf3828 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2400,6 +2400,8 @@ def : GCNPat < (S_CMP_EQ_U32 (S_AND_B32 (i32 1), $a), (i32 1)) >; + + def : GCNPat < (i1 (UniformUnaryFrag i16:$a)),