[llvm-branch-commits] [llvm] release/18.x: [SROA]: Only defer trying partial sized ptr or ptr vector types (PR #86114)

2024-03-21 Thread Jeffrey Byrnes via llvm-branch-commits

https://github.com/jrbyrnes approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/86114
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] e0fb937 - debugging v2i8/v3i8

2022-10-17 Thread Jeffrey Byrnes via llvm-branch-commits

Author: Jeffrey Byrnes
Date: 2022-10-13T11:52:20-07:00
New Revision: e0fb937455d9339a286f82fc2a2a9c38a0370831

URL: 
https://github.com/llvm/llvm-project/commit/e0fb937455d9339a286f82fc2a2a9c38a0370831
DIFF: 
https://github.com/llvm/llvm-project/commit/e0fb937455d9339a286f82fc2a2a9c38a0370831.diff

LOG: debugging v2i8/v3i8

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/CodeGen/TargetLoweringBase.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 629e7b84cf71..528ee108408f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -966,12 +966,32 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
 return;
 
 #ifndef NDEBUG
-  for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+/*
+  for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+errs() << "Checking legality of: \n";
+auto temp = Node->getOperand(i-1);
+temp.dump();
+errs() << "\n";
 assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
  TargetLowering::TypeLegal &&
"Unexpected illegal type!");
+  }
+*/
+  errs() << "Quick Legal Check\n";
+  for (const SDValue &Op : Node->op_values()) {
+Op.dump();
+errs() << "\n";
+assert(TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
+ TargetLowering::TypeLegal &&
+   "Unexpected illegal type!");
+  }
+
+  errs() << "Full Legal Check\n";
 
   for (const SDValue &Op : Node->op_values()) {
+errs() << "Checking op: \n";
+Op.dump();
+errs() << "\n";
 if (TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
   TargetLowering::TypeLegal) errs() << 
"TargetLowering::TypeLegal\n";
 if (Op.getOpcode() == ISD::Register) errs() << "Register\n";

diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp 
b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 973631848662..363bafb48c55 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -954,12 +954,16 @@ void TargetLoweringBase::setJumpIsExpensive(bool 
isExpensive) {
 
 TargetLoweringBase::LegalizeKind
 TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
+  errs() << "in TLB::getTypeConv\n";
   // If this is a simple type, use the ComputeRegisterProp mechanism.
   if (VT.isSimple()) {
+errs() << "isSimple]\n";
 MVT SVT = VT.getSimpleVT();
 assert((unsigned)SVT.SimpleTy < std::size(TransformToType));
 MVT NVT = TransformToType[SVT.SimpleTy];
+errs() << "Found TypeTransform" << (int)NVT.SimpleTy << "\n";
 LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
+errs() << "Found TypeAction: " << (int)LA << "\n";
 
 assert((LA == TypeLegal || LA == TypeSoftenFloat ||
 LA == TypeSoftPromoteHalf ||

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f962e49418c5..f92bde72867a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -245,7 +245,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
 MVT::v2f64,  MVT::v4i16,  MVT::v4f16,  MVT::v3i64,  MVT::v3f64,
 MVT::v6i32,  MVT::v6f32,  MVT::v4i64,  MVT::v4f64,  MVT::v8i64,
 MVT::v8f64,  MVT::v8i16,  MVT::v8f16,  MVT::v16i16, MVT::v16f16,
-MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32, MVT::v4i8}) {
+MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32, MVT::v4i8,
+MVT::v2i8}) {
 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
   switch (Op) {
   case ISD::LOAD:
@@ -5777,6 +5778,8 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue 
Op,
   SelectionDAG &DAG) const {
   SDLoc SL(Op);
 
+
+
   EVT ResultVT = Op.getValueType();
   SDValue Vec = Op.getOperand(0);
   SDValue Idx = Op.getOperand(1);
@@ -5784,6 +5787,8 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue 
Op,
   unsigned VecSize = VecVT.getSizeInBits();
   EVT EltVT = VecVT.getVectorElementType();
 
+  errs() << "found EVE with res: " << ResultVT.getEVTString() << " and src: " 
<< VecVT.getEVTString() << "\n";
+
   DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
 
   // Make sure we do any optimizations that will make it easier to fold
@@ -5859,6 +5864,11 @@ SDValue 
SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
 return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result);
   }
 
+  if (ResultVT == MVT::i8) {
+SDValue Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i8, Elt);
+return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result);
+  }
+
   return DAG.getAnyExtOrTrunc(Elt, SL, ResultVT);
 }
 
@@ -6038,11 +6048,11 @

[llvm-branch-commits] [llvm] f431123 - resolved issues with ret v2i8

2022-10-17 Thread Jeffrey Byrnes via llvm-branch-commits

Author: Jeffrey Byrnes
Date: 2022-10-13T14:13:27-07:00
New Revision: f431123ac5be268c4707d7f16878039c6051e71c

URL: 
https://github.com/llvm/llvm-project/commit/f431123ac5be268c4707d7f16878039c6051e71c
DIFF: 
https://github.com/llvm/llvm-project/commit/f431123ac5be268c4707d7f16878039c6051e71c.diff

LOG: resolved issues with ret v2i8

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 528ee108408f..081d8d96c9e2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -958,6 +958,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
 
 /// Return a legal replacement for the given operation, with all legal 
operands.
 void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+  errs() << "\n\n\nDAG BEFORE\n";
+  DAG.dump();
+  errs() << "\n";
+
   LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
 
   // Allow illegal target nodes and illegal registers.
@@ -1310,10 +1314,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
   return;
 case TargetLowering::Custom:
+  errs() << "from legalizeDAG.cpp\n";
   LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
   // FIXME: The handling for custom lowering with multiple results is
   // a complete mess.
   if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
+errs() << "TLI.LowerOperation returned\n";
 if (!(Res.getNode() != Node || Res.getResNo() != 0))
   return;
 

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 95cd5371814e..5573acb5f6e5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1131,6 +1131,7 @@ SDValue SelectionDAGBuilder::getControlRoot() {
 void SelectionDAGBuilder::visit(const Instruction &I) {
   // Set up outgoing PHI node register values before emitting the terminator.
   if (I.isTerminator()) {
+errs() << "Is terminator\n";
 HandlePHINodesInSuccessorBlocks(I.getParent());
   }
 
@@ -1149,6 +1150,7 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
 DAG, [&](SDNode *) { NodeInserted = true; });
   }
 
+  errs() << "calling visit with opcode " << I.getOpcodeName() << "\n";
   visit(I.getOpcode(), I);
 
   if (!I.isTerminator() && !HasTailCall &&
@@ -1936,6 +1938,7 @@ void SelectionDAGBuilder::visitCatchSwitch(const 
CatchSwitchInst &CSI) {
 }
 
 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
+  errs() << "in visitRet\n";
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   auto &DL = DAG.getDataLayout();
   SDValue Chain = getControlRoot();
@@ -1955,6 +1958,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
   }
 
   if (!FuncInfo.CanLowerReturn) {
+errs() << "!CanLowerReturn\n";
 unsigned DemoteReg = FuncInfo.DemoteRegister;
 const Function *F = I.getParent()->getParent();
 
@@ -1998,9 +2002,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
 MVT::Other, Chains);
   } else if (I.getNumOperands() != 0) {
+errs() << "CanReturn && NumOpers !=0\n";
 SmallVector ValueVTs;
 ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
 unsigned NumValues = ValueVTs.size();
+errs() << "NumValues: " << NumValues << "\n";
 if (NumValues) {
   SDValue RetOp = getValue(I.getOperand(0));
 
@@ -2027,9 +2033,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
 
 CallingConv::ID CC = F->getCallingConv();
 
+errs() << "calling getNumRegs for CallConv\n";
 unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
 MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
 SmallVector Parts(NumParts);
+errs() << "Calling getCopyToParts with NumParts: " << NumParts << "\n";
 getCopyToParts(DAG, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
&Parts[0], NumParts, PartVT, &I, CC, ExtendKind);
@@ -2067,6 +2075,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
 }
   }
 
+  errs() << "Made it passed end of condition\n";
+
   // Push in swifterror virtual register as the last element of Outs. This 
makes
   // sure swi

[llvm-branch-commits] [llvm] 30fc9fa - cleaned up print statements, checking load/store behavior

2022-10-17 Thread Jeffrey Byrnes via llvm-branch-commits

Author: Jeffrey Byrnes
Date: 2022-10-14T07:56:45-07:00
New Revision: 30fc9fa3a4695f99b0aaabcec2e05118e8ee4b61

URL: 
https://github.com/llvm/llvm-project/commit/30fc9fa3a4695f99b0aaabcec2e05118e8ee4b61
DIFF: 
https://github.com/llvm/llvm-project/commit/30fc9fa3a4695f99b0aaabcec2e05118e8ee4b61.diff

LOG: cleaned up print statements, checking load/store behavior

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/CodeGen/TargetLoweringBase.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 081d8d96c9e2..fe358aa89881 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -958,10 +958,6 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
 
 /// Return a legal replacement for the given operation, with all legal 
operands.
 void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
-  errs() << "\n\n\nDAG BEFORE\n";
-  DAG.dump();
-  errs() << "\n";
-
   LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
 
   // Allow illegal target nodes and illegal registers.
@@ -970,35 +966,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
 return;
 
 #ifndef NDEBUG
-/*
+
   for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
-errs() << "Checking legality of: \n";
-auto temp = Node->getOperand(i-1);
-temp.dump();
-errs() << "\n";
 assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
  TargetLowering::TypeLegal &&
"Unexpected illegal type!");
   }
-*/
-  errs() << "Quick Legal Check\n";
-  for (const SDValue &Op : Node->op_values()) {
-Op.dump();
-errs() << "\n";
-assert(TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
- TargetLowering::TypeLegal &&
-   "Unexpected illegal type!");
-  }
-
-  errs() << "Full Legal Check\n";
 
   for (const SDValue &Op : Node->op_values()) {
-errs() << "Checking op: \n";
-Op.dump();
-errs() << "\n";
-if (TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
-  TargetLowering::TypeLegal) errs() << 
"TargetLowering::TypeLegal\n";
-if (Op.getOpcode() == ISD::Register) errs() << "Register\n";
 assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
   TargetLowering::TypeLegal ||
 Op.getOpcode() == ISD::TargetConstant ||
@@ -1314,12 +1289,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
   return;
 case TargetLowering::Custom:
-  errs() << "from legalizeDAG.cpp\n";
   LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
   // FIXME: The handling for custom lowering with multiple results is
   // a complete mess.
   if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
-errs() << "TLI.LowerOperation returned\n";
 if (!(Res.getNode() != Node || Res.getResNo() != 0))
   return;
 

diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp 
b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 363bafb48c55..e6b577b4cc68 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -954,16 +954,13 @@ void TargetLoweringBase::setJumpIsExpensive(bool 
isExpensive) {
 
 TargetLoweringBase::LegalizeKind
 TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
-  errs() << "in TLB::getTypeConv\n";
   // If this is a simple type, use the ComputeRegisterProp mechanism.
   if (VT.isSimple()) {
-errs() << "isSimple]\n";
 MVT SVT = VT.getSimpleVT();
 assert((unsigned)SVT.SimpleTy < std::size(TransformToType));
 MVT NVT = TransformToType[SVT.SimpleTy];
-errs() << "Found TypeTransform" << (int)NVT.SimpleTy << "\n";
 LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
-errs() << "Found TypeAction: " << (int)LA << "\n";
+
 
 assert((LA == TypeLegal || LA == TypeSoftenFloat ||
 LA == TypeSoftPromoteHalf ||



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 85982d6 - new selection patterns for load/store

2022-10-17 Thread Jeffrey Byrnes via llvm-branch-commits

Author: Jeffrey Byrnes
Date: 2022-10-14T09:52:11-07:00
New Revision: 85982d60133d2bfdabb33dbf95b1dce3f9754ae7

URL: 
https://github.com/llvm/llvm-project/commit/85982d60133d2bfdabb33dbf95b1dce3f9754ae7
DIFF: 
https://github.com/llvm/llvm-project/commit/85982d60133d2bfdabb33dbf95b1dce3f9754ae7.diff

LOG: new selection patterns for load/store

Added: 


Modified: 
llvm/lib/Target/AMDGPU/BUFInstructions.td
llvm/lib/Target/AMDGPU/FLATInstructions.td
llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td 
b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 09f3035c6215..47563dafe56c 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -949,6 +949,10 @@ defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", 
v2i32, load_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>;
 
+//defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i8, atomic_load_8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", v2i8, 
atomic_load_16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", v2i8, atomic_load_8_global>;
+
 defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores <
   "buffer_store_byte", i32, truncstorei8_global
 >;
@@ -1758,6 +1762,15 @@ defm : MUBUFLoad_Pattern ;
 
 defm : MUBUFLoad_Pattern ;
 
+defm : MUBUFLoad_Pattern ;
+defm : MUBUFLoad_Pattern ;
+defm : MUBUFLoad_Pattern ;
+defm : MUBUFLoad_Pattern ;
+defm : MUBUFLoad_Pattern ;
+defm : MUBUFLoad_Pattern ;
+defm : MUBUFLoad_Pattern ;
+//defm : MUBUFLoad_Pattern ;
+
 } // End OtherPredicates = [Has16BitInsts]
 
 multiclass MUBUFScratchLoadPat ;
 defm : MUBUFScratchLoadPat ;
 
+
+defm : MUBUFScratchLoadPat ;
+defm : MUBUFScratchLoadPat ;
+defm : MUBUFScratchLoadPat ;
+defm : MUBUFScratchLoadPat ;
+//defm : MUBUFScratchLoadPat ;
+
 foreach vt = Reg32Types.types in {
 defm : MUBUFScratchLoadPat ;
 }
@@ -1847,6 +1867,9 @@ defm : MUBUFStore_Atomic_Pattern 
;
 defm : MUBUFStore_Atomic_Pattern ;
 defm : MUBUFStore_Atomic_Pattern ;
+//defm : MUBUFStore_Atomic_Pattern ;
+defm : MUBUFStore_Atomic_Pattern ;
+defm : MUBUFStore_Atomic_Pattern ;
 } // End Predicates = isGFX6GFX7
 
 
@@ -1861,6 +1884,9 @@ multiclass MUBUFStore_Pattern ;
 defm : MUBUFStore_Pattern ;
+defm : MUBUFStore_Pattern ;
+defm : MUBUFStore_Pattern ;
+//defm : MUBUFStore_Pattern ;
 
 multiclass MUBUFScratchStorePat ;
 defm : MUBUFScratchStorePat ;
 defm : MUBUFScratchStorePat ;
+defm : MUBUFScratchStorePat ;
+defm : MUBUFScratchStorePat ;
+//defm : MUBUFScratchStorePat ;
 
 foreach vt = Reg32Types.types in {
 defm : MUBUFScratchStorePat ;

diff  --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td 
b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index b7f9c558f83a..2f349d12167c 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1106,14 +1106,21 @@ let OtherPredicates = [HasFlatAddressSpace] in {
 
 def : FlatLoadPat ;
 def : FlatLoadPat ;
+//def : FlatLoadPat ;
+def : FlatLoadPat ;
 def : FlatLoadPat ;
 def : FlatLoadPat ;
+def : FlatLoadPat ;
 def : FlatLoadPat ;
 def : FlatLoadPat ;
 def : FlatLoadPat ;
 def : FlatLoadPat ;
 def : FlatLoadPat ;
 def : FlatLoadPat ;
+def : FlatLoadPat ;
+def : FlatLoadPat ;
+def : FlatLoadPat ;
+//def : FlatLoadPat ;
 def : FlatLoadPat ;
 def : FlatLoadPat ;
 def : FlatLoadPat ;
@@ -1125,6 +1132,9 @@ def : FlatLoadPat ;
 
 def : FlatStorePat ;
 def : FlatStorePat ;
+def : FlatStorePat ;
+//def : FlatStorePat ;
+def : FlatStorePat ;
 
 foreach vt = Reg32Types.types in {
 def : FlatLoadPat ;
@@ -1150,6 +1160,10 @@ def : FlatStoreAtomicPat ;
 def : FlatStoreAtomicPat ;
 def : FlatStoreAtomicPat ;
 
+//def : FlatStoreAtomicPat ;
+def : FlatStoreAtomicPat ;
+def : FlatStoreAtomicPat ;
+
 foreach as = [ "flat", "global" ] in {
 defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
 defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
@@ -1350,18 +1364,29 @@ let OtherPredicates = [HasFlatGlobalInsts] in {
 
 defm : GlobalFLATLoadPats ;
 defm : GlobalFLATLoadPats ;
+//defm : GlobalFLATLoadPats ;
+defm : GlobalFLATLoadPats ;
 defm : GlobalFLATLoadPats ;
 defm : GlobalFLATLoadPats ;
+defm : GlobalFLATLoadPats ;
 defm : GlobalFLATLoadPats ;
 defm : GlobalFLATLoadPats ;
 defm : GlobalFLATLoadPats ;
 defm : GlobalFLATLoadPats ;
 defm : GlobalFLATLoadPats ;
 defm : GlobalFLATLoadPats ;
+defm : GlobalFLATLoadPats ;
+defm : GlobalFLATLoadPats ;
+defm : GlobalFLATLoadPats ;
+//defm : GlobalFLATLoadPats ;
 defm : GlobalFLATLoadPats ;
 defm : GlobalFLATLoadPats ;
 defm : GlobalFLATLoadPats ;
 defm : GlobalFLATLoadPats ;
+defm : GlobalFLATLoadPats ;
+
+
+
 
 foreach vt = Reg32Types.types in {
 defm : GlobalFLATLoadPats ;
@@ -1392,6 +1417,11 @@ defm : GlobalFLATStorePats ;
 defm : GlobalFLATStor

[llvm-branch-commits] [llvm] ec1747c - Able to produce good initial SelectionDAG for ret. resolved extract_subvector legalizing, able to build the test.ll

2022-10-17 Thread Jeffrey Byrnes via llvm-branch-commits

Author: Jeffrey Byrnes
Date: 2022-10-13T10:48:41-07:00
New Revision: ec1747cb71d0db73b268d17367b83652cd4e2ad3

URL: 
https://github.com/llvm/llvm-project/commit/ec1747cb71d0db73b268d17367b83652cd4e2ad3
DIFF: 
https://github.com/llvm/llvm-project/commit/ec1747cb71d0db73b268d17367b83652cd4e2ad3.diff

LOG: Able to produce good initial SelectionDAG for ret. resolved 
extract_subvector legalizing, able to build the test.ll

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index e62f57c536b37..629e7b84cf71d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -971,12 +971,16 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
  TargetLowering::TypeLegal &&
"Unexpected illegal type!");
 
-  for (const SDValue &Op : Node->op_values())
+  for (const SDValue &Op : Node->op_values()) {
+if (TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
+  TargetLowering::TypeLegal) errs() << 
"TargetLowering::TypeLegal\n";
+if (Op.getOpcode() == ISD::Register) errs() << "Register\n";
 assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
   TargetLowering::TypeLegal ||
 Op.getOpcode() == ISD::TargetConstant ||
 Op.getOpcode() == ISD::Register) &&
 "Unexpected illegal type!");
+  }
 #endif
 
   // Figure out the correct action; the way to query this varies by opcode

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td 
b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index b6c66077675ff..523788106db63 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -22,28 +22,28 @@ def CC_SI_Gfx : CallingConv<[
   // 32 is reserved for the stack pointer
   // 33 is reserved for the frame pointer
   // 34 is reserved for the base pointer
-  CCIfInReg>>,
 
-  CCIfNotInReg>>,
 
-  CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>
+  CCIfType<[i32, f32, v2i16, v2f16, i16, f16, v4i8, i1], CCAssignToStack<4, 4>>
 ]>;
 
 def RetCC_SI_Gfx : CallingConv<[
   CCIfType<[i1], CCPromoteToType>,
   CCIfType<[i1, i16], CCIfExtend>>,
 
-  CCIfNotInReg>>,
 
   // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
-  CCIfNotInReg>>,
-  CCIfType<[i32, i16] , CCAssignToReg<[
+  CCIfType<[i32, i16, v4i8] , CCAssignToReg<[
 SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
 SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
 SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
@@ -183,19 +183,19 @@ def CC_AMDGPU_Func : CallingConv<[
   CCIfByVal>,
   CCIfType<[i1], CCPromoteToType>,
   CCIfType<[i8, i16], CCIfExtend>>,
-  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[
+  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, v4i8, i1], CCAssignToReg<[
 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
 VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
-  CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>
+  CCIfType<[i32, f32, v2i16, v2f16, i16, f16, v4i8, i1], CCAssignToStack<4, 4>>
 ]>;
 
 // Calling convention for leaf functions
 def RetCC_AMDGPU_Func : CallingConv<[
   CCIfType<[i1], CCPromoteToType>,
   CCIfType<[i1, i16], CCIfExtend>>,
-  CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[
+  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, v4i8], CCAssignToReg<[
 VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
 VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
 VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 9c2247f336ee1..9980e851f9820 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -322,7 +322,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const 
TargetMachine &TM,
MVT::v7i32,  MVT::v8f32,  MVT::v8i32,  MVT::v16f16, MVT::v16i16,
MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32, MVT::v2f64,
MVT::v2i64,  MVT::v3f64,  MVT::v3i64,  MVT::v4f64,  MVT::v4i64,
-   MVT::v8f64,  MVT::v8i64,  MVT::v16f64, MVT::v16i64},
+   MVT::v8f64,  MVT::v8i64,  MVT::v16f64, MVT::v16i64, MVT::v2i8,
+   MVT::v4i8},
   Custom);
 
   setOperat

[llvm-branch-commits] [llvm] bb408f1 - save for switching

2022-10-17 Thread Jeffrey Byrnes via llvm-branch-commits

Author: Jeffrey Byrnes
Date: 2022-10-14T14:54:05-07:00
New Revision: bb408f1e1a8a97826b28e3e9327bd8ad91dbd5a1

URL: 
https://github.com/llvm/llvm-project/commit/bb408f1e1a8a97826b28e3e9327bd8ad91dbd5a1
DIFF: 
https://github.com/llvm/llvm-project/commit/bb408f1e1a8a97826b28e3e9327bd8ad91dbd5a1.diff

LOG: save for switching

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index fe358aa89881..6c1c296d8014 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -974,6 +974,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   }
 
   for (const SDValue &Op : Node->op_values()) {
+errs() << "Checking op: ";
+Op.dump();
+errs() << "\n";
 assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
   TargetLowering::TypeLegal ||
 Op.getOpcode() == ISD::TargetConstant ||

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index abb864c6a829..c32f92cd0da0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -84,8 +84,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
 
   addRegisterClass(MVT::v4i8, &AMDGPU::SReg_32RegClass);
-  //addRegisterClass(MVT::v2i8, &AMDGPU::SReg_32RegClass);
+  addRegisterClass(MVT::v2i8, &AMDGPU::SReg_32RegClass);
   addRegisterClass(MVT::i8, &AMDGPU::SReg_32RegClass);
+  //addRegisterClass(MVT::i8, &AMDGPU::VReg_32RegClass);
 
   addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
 
@@ -5719,9 +5720,14 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue 
Op,
   unsigned EltSize = EltVT.getSizeInBits();
   SDLoc SL(Op);
 
+
   // Specially handle the case of v4i16 with static indexing.
   unsigned NumElts = VecVT.getVectorNumElements();
   auto KIdx = dyn_cast(Idx);
+
+  errs() << "legalizing insert_ve with num elts, eltsize " << NumElts << " " 
<< EltSize << "\n";
+
+
   if (NumElts == 4 && EltSize == 16 && KIdx) {
 SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Vec);
 

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td 
b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d88272fc485c..f6644d131b68 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2788,7 +2788,29 @@ def : GCNPat <
   (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
 >;
 
+/*
+def : GCNPat <
+  (v4i8 (build_vector (i8:$src0), (i8:$src1), (i8:$src2), (i8:$src3))),
+  (v4i8 (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src3, (i32 24))), (i32 
(V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src2, (i32 16))), (i32 (V_OR_B32_e64 
(i32 (S_LSHL_B32 SReg_32:$src1, (i32 8))), SReg_32:$src0)))
+>;
+
+
+def : GCNPat <
+  (v2i8 (build_vector (i8:$src0), (i8:$src1))),
+  (v2i8 (i16 (V_OR_B32_e64 (i16 (S_LSHL_B32 SReg_32:$src1, (i32 8))), 
SReg_32:$src0)))
+>;
+
 
+def : GCNPat <
+  (v2i8 (build_vector i8:$src0, (i8 undef))),
+  (COPY $src0)
+>;
+
+def : GCNPat <
+  (v2i8 (DivergentBinFrag (i8 undef), (i8 SReg_32:$src1))),
+  (v2i8 (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1))
+>;
+*/
 
 foreach Ty = [i16, f16] in {
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 39bf272 - tablegen accepts i8 as operand in patterns

2022-10-17 Thread Jeffrey Byrnes via llvm-branch-commits

Author: Jeffrey Byrnes
Date: 2022-10-14T16:23:25-07:00
New Revision: 39bf272b7d5086b982f0ec4b4aa545310f8ef20a

URL: 
https://github.com/llvm/llvm-project/commit/39bf272b7d5086b982f0ec4b4aa545310f8ef20a
DIFF: 
https://github.com/llvm/llvm-project/commit/39bf272b7d5086b982f0ec4b4aa545310f8ef20a.diff

LOG: tablegen accepts i8 as operand in patterns

Added: 


Modified: 
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Removed: 




diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td 
b/llvm/lib/Target/AMDGPU/SIInstructions.td
index f6644d131b68..b0bf6aca56b5 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1308,6 +1308,7 @@ foreach Index = 0-31 in {
 // FIXME: Why do only some of these type combinations for SReg and
 // VReg?
 // 16-bit bitcast
+
 def : BitConvert ;
 def : BitConvert ;
 def : BitConvert ;
@@ -2788,13 +2789,58 @@ def : GCNPat <
   (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
 >;
 
-/*
 def : GCNPat <
-  (v4i8 (build_vector (i8:$src0), (i8:$src1), (i8:$src2), (i8:$src3))),
-  (v4i8 (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src3, (i32 24))), (i32 
(V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src2, (i32 16))), (i32 (V_OR_B32_e64 
(i32 (S_LSHL_B32 SReg_32:$src1, (i32 8))), SReg_32:$src0)))
+  (v2i8 (DivergentBinFrag (i8 0), (i8 SReg_32:$src1))),
+  (v2i8 (V_LSHLREV_B32_e64 (i8 8), SReg_32:$src1))
 >;
 
+def : GCNPat <
+  (v4i8 (build_vector (i8 SReg_32:$src0), (i8 SReg_32:$src1), (i8 
SReg_32:$src2), (i8 SReg_32:$src3))),
+  
+
+  (v4i8 
+
+   (V_OR_B32_e64 
+
+   (S_LSHL_B32 
+   SReg_32:$src3, 
+   (i32 24)
+   )
+   , 
+ 
+   (V_OR_B32_e64 
+
+   (S_LSHL_B32 
+   SReg_32:$src2, 
+   (i32 16)
+   )
+   , 
+
+   (V_OR_B32_e64 
+   
+   (S_LSHL_B32 
+   
SReg_32:$src1, 
+   (i32 8)
+   )
+   , 
+   SReg_32:$src0
+   )
+
+   )
 
+   )
+
+   
+)
+>;
+
+/*
+def : GCNPat <
+  (v4i8 (build_vector (i8 SReg_32:$src0), (i8 SReg_32:$src1), (i8 
SReg_32:$src2), (i8 SReg_32:$src3))),
+  (v4i8 (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src3, (i32 24))), (i32 
(V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src2, (i32 16))), (i32 (V_OR_B32_e64 
(i32 (S_LSHL_B32 SReg_32:$src1, (i32 8))), SReg_32:$src0)))
+>;
+*/
+/*
 def : GCNPat <
   (v2i8 (build_vector (i8:$src0), (i8:$src1))),
   (v2i8 (i16 (V_OR_B32_e64 (i16 (S_LSHL_B32 SReg_32:$src1, (i32 8))), 
SReg_32:$src0)))
@@ -2808,10 +2854,11 @@ def : GCNPat <
 
 def : GCNPat <
   (v2i8 (DivergentBinFrag (i8 undef), (i8 SReg_32:$src1))),
-  (v2i8 (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1))
+  (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1)
 >;
 */
 
+
 foreach Ty = [i16, f16] in {
 
 defvar vecTy = !if(!eq(Ty, i16), v2i16, v2f16);

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td 
b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index c07333b17ff3..4db31c87ac06 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -369,7 +369,7 @@ def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
 }
 
 // SGPR 32-bit registers
-def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, 
v4i8, v2i8], 32,
+def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, 
v4i8, v2i8, i8], 32,
 (add (sequence "SGPR%u", 0, 105))> {
   // Give all SGPR classes higher priority than VGPR classes, because
   // we want to spill SGPRs to VGPRs.
@@ -406,7 +406,7 @@ def SGPR_512Regs : SIRegisterTuples.ret, 
SGPR_32, 105, 4, 16, "s"
 def SGPR_1024Regs : SIRegisterTuples.ret, SGPR_32, 105, 4, 32, 
"s">;
 
 // Trap handler TMP 32-bit registers
-def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16, v4i8, v2i8], 
32,
+def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16, v4i8, v2i8, 
i8], 32,
 (add (sequence "TTMP%u", 0, 15))> {
   let isAllocatable = 0;
   let HasSGPR = 1;
@@ -527,8 +527,8 @@ class RegisterTypes reg_types> {
   list types = reg_types;
 }
 
-

[llvm-branch-commits] [llvm] 37c65eb - legalize IVE, v2i8, v4i8

2022-10-17 Thread Jeffrey Byrnes via llvm-branch-commits

Author: Jeffrey Byrnes
Date: 2022-10-17T15:29:54-07:00
New Revision: 37c65ebbcc0b7106fba7bb791a36d7ddabc60ece

URL: 
https://github.com/llvm/llvm-project/commit/37c65ebbcc0b7106fba7bb791a36d7ddabc60ece
DIFF: 
https://github.com/llvm/llvm-project/commit/37c65ebbcc0b7106fba7bb791a36d7ddabc60ece.diff

LOG: legalize IVE, v2i8, v4i8

Added: 


Modified: 
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td

Removed: 




diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 62ac1fcd95ce..37d907059687 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -577,6 +577,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
   break;
 }
 
+if (VT.getScalarSizeInBits() == 8) {
+  break;
+}
+
 assert(VT.getVectorElementType().bitsEq(MVT::i32));
 unsigned RegClassID =
 SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c32f92cd0da0..f25bcdd28d9c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -171,7 +171,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1,
   MVT::v32i32},
  Custom);
-
+ 
+  //setTruncStoreAction(MVT::i8, MVT::i32, Expand);
   setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
   setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand);
   setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
@@ -5729,6 +5730,12 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue 
Op,
 
 
   if (NumElts == 4 && EltSize == 16 && KIdx) {
+//errs() << "special case for v4i16\n";
+//errs() << "VecVT, Op1VT, EltVT: ";
+errs() << VecVT.getEVTString() << " " << 
InsVal.getValueType().getEVTString() << " ";
+errs() << EltVT.getEVTString() << "\n";
+
+
 SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Vec);
 
 SDValue LoHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BCVec,
@@ -5755,6 +5762,46 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue 
Op,
 return DAG.getNode(ISD::BITCAST, SL, VecVT, Concat);
   }
 
+  if (NumElts == 4 && EltSize == 8 && KIdx) {
+errs() << "special case for v4i8\n";
+errs() << "VecVT, Op1VT, EltVT: ";
+errs() << VecVT.getEVTString() << " " << 
InsVal.getValueType().getEVTString() << " ";
+errs() << EltVT.getEVTString() << "\n";
+
+
+errs() << "First bitcast\n";
+SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Vec);
+
+SDValue LoHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i16, BCVec,
+ DAG.getConstant(0, SL, MVT::i32));
+SDValue HiHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i16, BCVec,
+ DAG.getConstant(1, SL, MVT::i32));
+
+errs() << "Second bitcast\n";
+SDValue LoVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i8, LoHalf);
+errs() << "Third bitcast\n";
+SDValue HiVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i8, HiHalf);
+
+unsigned Idx = KIdx->getZExtValue();
+bool InsertLo = Idx < 2;
+SDValue InsHalf = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, MVT::v2i8,
+  InsertLo ? LoVec : HiVec,
+  DAG.getNode(ISD::BITCAST, SL, MVT::i8, InsVal),
+  DAG.getConstant(InsertLo ? Idx : (Idx - 2), SL, MVT::i32));
+
+errs() << "Fourth bitcast\n";
+InsHalf = DAG.getNode(ISD::BITCAST, SL, MVT::i16, InsHalf);
+
+SDValue Concat = InsertLo ?
+  DAG.getBuildVector(MVT::v2i16, SL, { InsHalf, HiHalf }) :
+  DAG.getBuildVector(MVT::v2i16, SL, { LoHalf, InsHalf });
+
+return DAG.getNode(ISD::BITCAST, SL, VecVT, Concat);
+  }
+
+
+
+
   // Static indexing does not lower to stack access, and hence there is no need
   // for special custom lowering to avoid stack access.
   if (isa(Idx))
@@ -5885,11 +5932,12 @@ SDValue 
SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
 return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result);
   }
 
+/*
   if (ResultVT == MVT::i8) {
 SDValue Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i8, Elt);
 return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result);
   }
-
+*/
   return DAG.getAnyExtOrTrunc(Elt, SL, ResultVT);
 }
 

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td 
b/llvm/lib/Target/AMDGPU/SIInstructions.td
index b0bf6aca56b5..f9129eaf3828 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2400,6 +2400,8 @@ def : GCNPat <
   (S_CMP_EQ_U32 (S_AND_B32 (i32 1), $a), (i32 1))
 >;
 
+
+
 def : GCNPat <
   (i1 (UniformUnaryFrag i16:$a)),