mbrkusanin created this revision.
mbrkusanin added reviewers: atanasyan, petarj, sdardis, mstojanovic.
mbrkusanin added projects: LLVM, clang.
Herald added subscribers: cfe-commits, jrtc27, hiraditya, arichardson.

New intrinisics are implemented for when we need to port SIMD code from other 
arhitectures and only load or store portions of MSA registers.

Following intriniscs are added which only load/store element 0 of a vector:
v4i32 __builtin_msa_ldrq_w (const void *, imm_n2048_2044);
v2i64 __builtin_msa_ldr_d (const void *, imm_n4096_4088);
void __builtin_msa_strq_w (v4i32, void *, imm_n2048_2044);
void __builtin_msa_str_d (v2i64, void *, imm_n4096_4088);


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D73644

Files:
  clang/include/clang/Basic/BuiltinsMips.def
  clang/lib/Headers/msa.h
  clang/lib/Sema/SemaChecking.cpp
  llvm/include/llvm/IR/IntrinsicsMips.td
  llvm/lib/Target/Mips/MipsISelLowering.cpp
  llvm/lib/Target/Mips/MipsISelLowering.h
  llvm/lib/Target/Mips/MipsMSAInstrInfo.td
  llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
  llvm/test/CodeGen/Mips/msa/ldr_str.ll

Index: llvm/test/CodeGen/Mips/msa/ldr_str.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/Mips/msa/ldr_str.ll
@@ -0,0 +1,224 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=mips     -mcpu=mips32r5 -mattr=+msa,+fp64 -O0 < %s | FileCheck %s --check-prefix=MIPS32R5-EB
+; RUN: llc -march=mipsel   -mcpu=mips32r5 -mattr=+msa,+fp64 -O0 < %s | FileCheck %s --check-prefix=MIPS32R5-EL
+; RUN: llc -march=mips     -mcpu=mips32r6 -mattr=+msa,+fp64 -O0 < %s | FileCheck %s --check-prefix=MIPS32R6-EB
+; RUN: llc -march=mipsel   -mcpu=mips32r6 -mattr=+msa,+fp64 -O0 < %s | FileCheck %s --check-prefix=MIPS32R6-EL
+; RUN: llc -march=mips64   -mcpu=mips64r6 -mattr=+msa,+fp64 -O0 < %s | FileCheck %s --check-prefix=MIPS64R6
+; RUN: llc -march=mips64el -mcpu=mips64r6 -mattr=+msa,+fp64 -O0 < %s | FileCheck %s --check-prefix=MIPS64R6
+
+; Test intrinsics for 4-byte and 8-byte MSA load and stores.
+
+define void @llvm_mips_ldr_d_test(<2 x i64>* %val, i8* %ptr) nounwind {
+; MIPS32R5-EB-LABEL: llvm_mips_ldr_d_test:
+; MIPS32R5-EB:       # %bb.0: # %entry
+; MIPS32R5-EB-NEXT:    # implicit-def: $at
+; MIPS32R5-EB-NEXT:    lwr $1, 23($5)
+; MIPS32R5-EB-NEXT:    lwl $1, 20($5)
+; MIPS32R5-EB-NEXT:    # implicit-def: $v0
+; MIPS32R5-EB-NEXT:    lwr $2, 19($5)
+; MIPS32R5-EB-NEXT:    lwl $2, 16($5)
+; MIPS32R5-EB-NEXT:    fill.w $w0, $1
+; MIPS32R5-EB-NEXT:    insert.w $w0[1], $2
+; MIPS32R5-EB-NEXT:    st.d $w0, 0($4)
+; MIPS32R5-EB-NEXT:    jr $ra
+; MIPS32R5-EB-NEXT:    nop
+;
+; MIPS32R5-EL-LABEL: llvm_mips_ldr_d_test:
+; MIPS32R5-EL:       # %bb.0: # %entry
+; MIPS32R5-EL-NEXT:    # implicit-def: $at
+; MIPS32R5-EL-NEXT:    lwr $1, 16($5)
+; MIPS32R5-EL-NEXT:    lwl $1, 19($5)
+; MIPS32R5-EL-NEXT:    # implicit-def: $v0
+; MIPS32R5-EL-NEXT:    lwr $2, 20($5)
+; MIPS32R5-EL-NEXT:    lwl $2, 23($5)
+; MIPS32R5-EL-NEXT:    fill.w $w0, $1
+; MIPS32R5-EL-NEXT:    insert.w $w0[1], $2
+; MIPS32R5-EL-NEXT:    st.d $w0, 0($4)
+; MIPS32R5-EL-NEXT:    jr $ra
+; MIPS32R5-EL-NEXT:    nop
+;
+; MIPS32R6-EB-LABEL: llvm_mips_ldr_d_test:
+; MIPS32R6-EB:       # %bb.0: # %entry
+; MIPS32R6-EB-NEXT:    lw $1, 20($5)
+; MIPS32R6-EB-NEXT:    lw $2, 16($5)
+; MIPS32R6-EB-NEXT:    fill.w $w0, $1
+; MIPS32R6-EB-NEXT:    insert.w $w0[1], $2
+; MIPS32R6-EB-NEXT:    st.d $w0, 0($4)
+; MIPS32R6-EB-NEXT:    jrc $ra
+;
+; MIPS32R6-EL-LABEL: llvm_mips_ldr_d_test:
+; MIPS32R6-EL:       # %bb.0: # %entry
+; MIPS32R6-EL-NEXT:    lw $1, 16($5)
+; MIPS32R6-EL-NEXT:    lw $2, 20($5)
+; MIPS32R6-EL-NEXT:    fill.w $w0, $1
+; MIPS32R6-EL-NEXT:    insert.w $w0[1], $2
+; MIPS32R6-EL-NEXT:    st.d $w0, 0($4)
+; MIPS32R6-EL-NEXT:    jrc $ra
+;
+; MIPS64R6-LABEL: llvm_mips_ldr_d_test:
+; MIPS64R6:       # %bb.0: # %entry
+; MIPS64R6-NEXT:    ld $1, 16($5)
+; MIPS64R6-NEXT:    fill.d $w0, $1
+; MIPS64R6-NEXT:    st.d $w0, 0($4)
+; MIPS64R6-NEXT:    jrc $ra
+entry:
+  %0 = tail call <2 x i64> @llvm.mips.ldr.d(i8* %ptr, i32 16)
+  store <2 x i64> %0, <2 x i64>* %val
+  ret void
+}
+
+declare <2 x i64> @llvm.mips.ldr.d(i8*, i32) nounwind
+
+define void @llvm_mips_ldrq_w_test(<4 x i32>* %val, i8* %ptr) nounwind {
+; MIPS32R5-EB-LABEL: llvm_mips_ldrq_w_test:
+; MIPS32R5-EB:       # %bb.0: # %entry
+; MIPS32R5-EB-NEXT:    # implicit-def: $at
+; MIPS32R5-EB-NEXT:    lwr $1, 19($5)
+; MIPS32R5-EB-NEXT:    lwl $1, 16($5)
+; MIPS32R5-EB-NEXT:    fill.w $w0, $1
+; MIPS32R5-EB-NEXT:    st.w $w0, 0($4)
+; MIPS32R5-EB-NEXT:    jr $ra
+; MIPS32R5-EB-NEXT:    nop
+;
+; MIPS32R5-EL-LABEL: llvm_mips_ldrq_w_test:
+; MIPS32R5-EL:       # %bb.0: # %entry
+; MIPS32R5-EL-NEXT:    # implicit-def: $at
+; MIPS32R5-EL-NEXT:    lwr $1, 16($5)
+; MIPS32R5-EL-NEXT:    lwl $1, 19($5)
+; MIPS32R5-EL-NEXT:    fill.w $w0, $1
+; MIPS32R5-EL-NEXT:    st.w $w0, 0($4)
+; MIPS32R5-EL-NEXT:    jr $ra
+; MIPS32R5-EL-NEXT:    nop
+;
+; MIPS32R6-EB-LABEL: llvm_mips_ldrq_w_test:
+; MIPS32R6-EB:       # %bb.0: # %entry
+; MIPS32R6-EB-NEXT:    lw $1, 16($5)
+; MIPS32R6-EB-NEXT:    fill.w $w0, $1
+; MIPS32R6-EB-NEXT:    st.w $w0, 0($4)
+; MIPS32R6-EB-NEXT:    jrc $ra
+;
+; MIPS32R6-EL-LABEL: llvm_mips_ldrq_w_test:
+; MIPS32R6-EL:       # %bb.0: # %entry
+; MIPS32R6-EL-NEXT:    lw $1, 16($5)
+; MIPS32R6-EL-NEXT:    fill.w $w0, $1
+; MIPS32R6-EL-NEXT:    st.w $w0, 0($4)
+; MIPS32R6-EL-NEXT:    jrc $ra
+;
+; MIPS64R6-LABEL: llvm_mips_ldrq_w_test:
+; MIPS64R6:       # %bb.0: # %entry
+; MIPS64R6-NEXT:    lw $1, 16($5)
+; MIPS64R6-NEXT:    fill.w $w0, $1
+; MIPS64R6-NEXT:    st.w $w0, 0($4)
+; MIPS64R6-NEXT:    jrc $ra
+entry:
+  %0 = tail call <4 x i32> @llvm.mips.ldrq.w(i8* %ptr, i32 16)
+  store <4 x i32> %0, <4 x i32>* %val
+  ret void
+}
+
+declare <4 x i32> @llvm.mips.ldrq.w(i8*, i32) nounwind
+
+define void @llvm_mips_str_d_test(<2 x i64>* %val, i8* %ptr) nounwind {
+; MIPS32R5-EB-LABEL: llvm_mips_str_d_test:
+; MIPS32R5-EB:       # %bb.0: # %entry
+; MIPS32R5-EB-NEXT:    ld.d $w0, 0($4)
+; MIPS32R5-EB-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5-EB-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5-EB-NEXT:    swr $1, 19($5)
+; MIPS32R5-EB-NEXT:    swl $1, 16($5)
+; MIPS32R5-EB-NEXT:    swr $2, 23($5)
+; MIPS32R5-EB-NEXT:    swl $2, 20($5)
+; MIPS32R5-EB-NEXT:    jr $ra
+; MIPS32R5-EB-NEXT:    nop
+;
+; MIPS32R5-EL-LABEL: llvm_mips_str_d_test:
+; MIPS32R5-EL:       # %bb.0: # %entry
+; MIPS32R5-EL-NEXT:    ld.d $w0, 0($4)
+; MIPS32R5-EL-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5-EL-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R5-EL-NEXT:    swr $1, 16($5)
+; MIPS32R5-EL-NEXT:    swl $1, 19($5)
+; MIPS32R5-EL-NEXT:    swr $2, 20($5)
+; MIPS32R5-EL-NEXT:    swl $2, 23($5)
+; MIPS32R5-EL-NEXT:    jr $ra
+; MIPS32R5-EL-NEXT:    nop
+;
+; MIPS32R6-EB-LABEL: llvm_mips_str_d_test:
+; MIPS32R6-EB:       # %bb.0: # %entry
+; MIPS32R6-EB-NEXT:    ld.d $w0, 0($4)
+; MIPS32R6-EB-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R6-EB-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R6-EB-NEXT:    sw $1, 20($5)
+; MIPS32R6-EB-NEXT:    sw $2, 16($5)
+; MIPS32R6-EB-NEXT:    jrc $ra
+;
+; MIPS32R6-EL-LABEL: llvm_mips_str_d_test:
+; MIPS32R6-EL:       # %bb.0: # %entry
+; MIPS32R6-EL-NEXT:    ld.d $w0, 0($4)
+; MIPS32R6-EL-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R6-EL-NEXT:    copy_s.w $2, $w0[1]
+; MIPS32R6-EL-NEXT:    sw $1, 16($5)
+; MIPS32R6-EL-NEXT:    sw $2, 20($5)
+; MIPS32R6-EL-NEXT:    jrc $ra
+;
+; MIPS64R6-LABEL: llvm_mips_str_d_test:
+; MIPS64R6:       # %bb.0: # %entry
+; MIPS64R6-NEXT:    ld.d $w0, 0($4)
+; MIPS64R6-NEXT:    copy_s.d $1, $w0[0]
+; MIPS64R6-NEXT:    sd $1, 16($5)
+; MIPS64R6-NEXT:    jrc $ra
+entry:
+  %0 = load <2 x i64>, <2 x i64>* %val
+  tail call void @llvm.mips.str.d(<2 x i64> %0, i8* %ptr, i32 16)
+  ret void
+}
+
+declare void @llvm.mips.str.d(<2 x i64>, i8*, i32) nounwind
+
+define void @llvm_mips_strq_w_test(<4 x i32>* %val, i8* %ptr) nounwind {
+; MIPS32R5-EB-LABEL: llvm_mips_strq_w_test:
+; MIPS32R5-EB:       # %bb.0: # %entry
+; MIPS32R5-EB-NEXT:    ld.w $w0, 0($4)
+; MIPS32R5-EB-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5-EB-NEXT:    swr $1, 19($5)
+; MIPS32R5-EB-NEXT:    swl $1, 16($5)
+; MIPS32R5-EB-NEXT:    jr $ra
+; MIPS32R5-EB-NEXT:    nop
+;
+; MIPS32R5-EL-LABEL: llvm_mips_strq_w_test:
+; MIPS32R5-EL:       # %bb.0: # %entry
+; MIPS32R5-EL-NEXT:    ld.w $w0, 0($4)
+; MIPS32R5-EL-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R5-EL-NEXT:    swr $1, 16($5)
+; MIPS32R5-EL-NEXT:    swl $1, 19($5)
+; MIPS32R5-EL-NEXT:    jr $ra
+; MIPS32R5-EL-NEXT:    nop
+;
+; MIPS32R6-EB-LABEL: llvm_mips_strq_w_test:
+; MIPS32R6-EB:       # %bb.0: # %entry
+; MIPS32R6-EB-NEXT:    ld.w $w0, 0($4)
+; MIPS32R6-EB-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R6-EB-NEXT:    sw $1, 16($5)
+; MIPS32R6-EB-NEXT:    jrc $ra
+;
+; MIPS32R6-EL-LABEL: llvm_mips_strq_w_test:
+; MIPS32R6-EL:       # %bb.0: # %entry
+; MIPS32R6-EL-NEXT:    ld.w $w0, 0($4)
+; MIPS32R6-EL-NEXT:    copy_s.w $1, $w0[0]
+; MIPS32R6-EL-NEXT:    sw $1, 16($5)
+; MIPS32R6-EL-NEXT:    jrc $ra
+;
+; MIPS64R6-LABEL: llvm_mips_strq_w_test:
+; MIPS64R6:       # %bb.0: # %entry
+; MIPS64R6-NEXT:    ld.w $w0, 0($4)
+; MIPS64R6-NEXT:    copy_s.w $1, $w0[0]
+; MIPS64R6-NEXT:    sw $1, 16($5)
+; MIPS64R6-NEXT:    jrc $ra
+entry:
+  %0 = load <4 x i32>, <4 x i32>* %val
+  tail call void @llvm.mips.strq.w(<4 x i32> %0, i8* %ptr, i32 16)
+  ret void
+}
+
+declare void @llvm.mips.strq.w(<4 x i32>, i8*, i32) nounwind
+
Index: llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -833,7 +833,9 @@
   }
 
   case ISD::INTRINSIC_W_CHAIN: {
-    switch (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+    const unsigned IntrinsicOpcode =
+        cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+    switch (IntrinsicOpcode) {
     default:
       break;
 
@@ -845,6 +847,40 @@
       ReplaceNode(Node, Reg.getNode());
       return true;
     }
+    case Intrinsic::mips_ldr_d:
+    case Intrinsic::mips_ldrq_w: {
+      unsigned Op = (IntrinsicOpcode == Intrinsic::mips_ldr_d) ? Mips::LDR_D
+                                                               : Mips::LDRQ_W;
+
+      SDLoc DL(Node);
+      assert(Node->getNumOperands() == 4 && "Unexpected number of operands.");
+      const SDValue &Chain = Node->getOperand(0);
+      const SDValue &Intrinsic = Node->getOperand(1);
+      const SDValue &Pointer = Node->getOperand(2);
+      const SDValue &Constant = Node->getOperand(3);
+
+      assert(Chain.getValueType() == MVT::Other);
+      assert(Intrinsic.getOpcode() == ISD::TargetConstant &&
+             Constant.getOpcode() == ISD::Constant &&
+             "Invalid instruction operand.");
+
+      // Convert Constant to TargetConstant.
+      const ConstantInt *Val =
+          cast<ConstantSDNode>(Constant)->getConstantIntValue();
+      SDValue Imm =
+          CurDAG->getTargetConstant(*Val, DL, Constant.getValueType());
+
+      SmallVector<SDValue, 3> Ops{Pointer, Imm, Chain};
+
+      assert(Node->getNumValues() == 2);
+      assert(Node->getValueType(0).is128BitVector());
+      assert(Node->getValueType(1) == MVT::Other);
+      SmallVector<EVT, 2> ResTys{Node->getValueType(0), Node->getValueType(1)};
+
+      ReplaceNode(Node, CurDAG->getMachineNode(Op, DL, ResTys, Ops));
+
+      return true;
+    }
     }
     break;
   }
@@ -866,7 +902,9 @@
   }
 
   case ISD::INTRINSIC_VOID: {
-    switch (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+    const unsigned IntrinsicOpcode =
+        cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+    switch (IntrinsicOpcode) {
     default:
       break;
 
@@ -879,6 +917,39 @@
       ReplaceNode(Node, ChainOut.getNode());
       return true;
     }
+    case Intrinsic::mips_str_d:
+    case Intrinsic::mips_strq_w: {
+      unsigned Op = (IntrinsicOpcode == Intrinsic::mips_str_d) ? Mips::STR_D
+                                                               : Mips::STRQ_W;
+
+      SDLoc DL(Node);
+      assert(Node->getNumOperands() == 5 && "Unexpected number of operands.");
+      const SDValue &Chain = Node->getOperand(0);
+      const SDValue &Intrinsic = Node->getOperand(1);
+      const SDValue &Vec = Node->getOperand(2);
+      const SDValue &Pointer = Node->getOperand(3);
+      const SDValue &Constant = Node->getOperand(4);
+
+      assert(Chain.getValueType() == MVT::Other);
+      assert(Intrinsic.getOpcode() == ISD::TargetConstant &&
+             Constant.getOpcode() == ISD::Constant &&
+             "Invalid instruction operand.");
+
+      // Convert Constant to TargetConstant.
+      const ConstantInt *Val =
+          cast<ConstantSDNode>(Constant)->getConstantIntValue();
+      SDValue Imm =
+          CurDAG->getTargetConstant(*Val, DL, Constant.getValueType());
+
+      SmallVector<SDValue, 4> Ops{Vec, Pointer, Imm, Chain};
+
+      assert(Node->getNumValues() == 1);
+      assert(Node->getValueType(0) == MVT::Other);
+      SmallVector<EVT, 1> ResTys{Node->getValueType(0)};
+
+      ReplaceNode(Node, CurDAG->getMachineNode(Op, DL, ResTys, Ops));
+      return true;
+    }
     }
     break;
   }
Index: llvm/lib/Target/Mips/MipsMSAInstrInfo.td
===================================================================
--- llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -2339,6 +2339,16 @@
 class LDI_W_DESC : MSA_I10_LDI_DESC_BASE<"ldi.w", MSA128WOpnd>;
 class LDI_D_DESC : MSA_I10_LDI_DESC_BASE<"ldi.d", MSA128DOpnd>;
 
+class MSA_LOAD_PSEUDO_BASE<SDPatternOperator intrinsic, RegisterOperand RO> :
+  PseudoSE<(outs RO:$dst), (ins PtrRC:$ptr, GPR32:$imm),
+           [(set RO:$dst, (intrinsic iPTR:$ptr, GPR32:$imm))]> {
+  let hasNoSchedulingInfo = 1;
+  let usesCustomInserter = 1;
+}
+
+def LDR_D : MSA_LOAD_PSEUDO_BASE<int_mips_ldr_d, MSA128DOpnd>;
+def LDRQ_W : MSA_LOAD_PSEUDO_BASE<int_mips_ldrq_w, MSA128WOpnd>;
+
 class LSA_DESC_BASE<string instr_asm, RegisterOperand RORD,
                     InstrItinClass itin = NoItinerary> {
   dag OutOperandList = (outs RORD:$rd);
@@ -2671,6 +2681,16 @@
 class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, MSA128DOpnd,
                                mem_simm10_lsl3, addrimm10lsl3>;
 
+class MSA_STORE_PSEUDO_BASE<SDPatternOperator intrinsic, RegisterOperand RO> :
+  PseudoSE<(outs), (ins RO:$dst, PtrRC:$ptr, GPR32:$imm),
+           [(intrinsic RO:$dst, iPTR:$ptr, GPR32:$imm)]> {
+  let hasNoSchedulingInfo = 1;
+  let usesCustomInserter = 1;
+}
+
+def STR_D : MSA_STORE_PSEUDO_BASE<int_mips_str_d, MSA128DOpnd>;
+def STRQ_W : MSA_STORE_PSEUDO_BASE<int_mips_strq_w, MSA128WOpnd>;
+
 class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b,
                                        MSA128BOpnd>;
 class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h,
Index: llvm/lib/Target/Mips/MipsISelLowering.h
===================================================================
--- llvm/lib/Target/Mips/MipsISelLowering.h
+++ llvm/lib/Target/Mips/MipsISelLowering.h
@@ -709,6 +709,12 @@
                                         bool isFPCmp, unsigned Opc) const;
     MachineBasicBlock *emitPseudoD_SELECT(MachineInstr &MI,
                                           MachineBasicBlock *BB) const;
+    MachineBasicBlock *emitLDRQ_W(MachineInstr &MI,
+                                  MachineBasicBlock *BB) const;
+    MachineBasicBlock *emitLDR_D(MachineInstr &MI, MachineBasicBlock *BB) const;
+    MachineBasicBlock *emitSTRQ_W(MachineInstr &MI,
+                                  MachineBasicBlock *BB) const;
+    MachineBasicBlock *emitSTR_D(MachineInstr &MI, MachineBasicBlock *BB) const;
   };
 
   /// Create MipsTargetLowering objects.
Index: llvm/lib/Target/Mips/MipsISelLowering.cpp
===================================================================
--- llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -1451,6 +1451,14 @@
   case Mips::PseudoD_SELECT_I:
   case Mips::PseudoD_SELECT_I64:
     return emitPseudoD_SELECT(MI, BB);
+  case Mips::LDRQ_W:
+    return emitLDRQ_W(MI, BB);
+  case Mips::LDR_D:
+    return emitLDR_D(MI, BB);
+  case Mips::STRQ_W:
+    return emitSTRQ_W(MI, BB);
+  case Mips::STR_D:
+    return emitSTR_D(MI, BB);
   }
 }
 
@@ -4717,3 +4725,274 @@
   }
   report_fatal_error("Invalid register name global variable");
 }
+
+MachineBasicBlock *MipsTargetLowering::emitLDRQ_W(MachineInstr &MI,
+                                                  MachineBasicBlock *BB) const {
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  const bool IsLittle = Subtarget.isLittle();
+  DebugLoc DL = MI.getDebugLoc();
+
+  Register Dest = MI.getOperand(0).getReg();
+  Register Address = MI.getOperand(1).getReg();
+  unsigned Imm = MI.getOperand(2).getImm();
+
+  MachineBasicBlock::iterator I(MI);
+
+  if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) {
+    // Mips release 6 can load from adress that is not naturally-aligned.
+    Register Temp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    BuildMI(*BB, I, DL, TII->get(Mips::LW))
+        .addDef(Temp)
+        .addUse(Address)
+        .addImm(Imm);
+    BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Dest).addUse(Temp);
+  } else {
+    // Mips release 5 needs to use instructions that can load from an unaligned
+    // memory address.
+    Register LoadHalf = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    Register LoadFull = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    Register Undef = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    BuildMI(*BB, I, DL, TII->get(Mips::IMPLICIT_DEF)).addDef(Undef);
+    BuildMI(*BB, I, DL, TII->get(Mips::LWR))
+        .addDef(LoadHalf)
+        .addUse(Address)
+        .addImm(Imm + (IsLittle ? 0 : 3))
+        .addUse(Undef);
+    BuildMI(*BB, I, DL, TII->get(Mips::LWL))
+        .addDef(LoadFull)
+        .addUse(Address)
+        .addImm(Imm + (IsLittle ? 3 : 0))
+        .addUse(LoadHalf);
+    BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Dest).addUse(LoadFull);
+  }
+
+  MI.eraseFromParent();
+  return BB;
+}
+
+MachineBasicBlock *MipsTargetLowering::emitLDR_D(MachineInstr &MI,
+                                                 MachineBasicBlock *BB) const {
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  const bool IsLittle = Subtarget.isLittle();
+  DebugLoc DL = MI.getDebugLoc();
+
+  Register Dest = MI.getOperand(0).getReg();
+  Register Address = MI.getOperand(1).getReg();
+  unsigned Imm = MI.getOperand(2).getImm();
+
+  MachineBasicBlock::iterator I(MI);
+
+  if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) {
+    // Mips release 6 can load from adress that is not naturally-aligned.
+    if (Subtarget.isGP64bit()) {
+      Register Temp = MRI.createVirtualRegister(&Mips::GPR64RegClass);
+      BuildMI(*BB, I, DL, TII->get(Mips::LD))
+          .addDef(Temp)
+          .addUse(Address)
+          .addImm(Imm);
+      BuildMI(*BB, I, DL, TII->get(Mips::FILL_D)).addDef(Dest).addUse(Temp);
+    } else {
+      Register Wtemp = MRI.createVirtualRegister(&Mips::MSA128WRegClass);
+      Register Lo = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+      Register Hi = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+      BuildMI(*BB, I, DL, TII->get(Mips::LW))
+          .addDef(Lo)
+          .addUse(Address)
+          .addImm(Imm + (IsLittle ? 0 : 4));
+      BuildMI(*BB, I, DL, TII->get(Mips::LW))
+          .addDef(Hi)
+          .addUse(Address)
+          .addImm(Imm + (IsLittle ? 4 : 0));
+      BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Wtemp).addUse(Lo);
+      BuildMI(*BB, I, DL, TII->get(Mips::INSERT_W), Dest)
+          .addUse(Wtemp)
+          .addUse(Hi)
+          .addImm(1);
+    }
+  } else {
+    // Mips release 5 needs to use instructions that can load from an unaligned
+    // memory address.
+    Register LoHalf = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    Register LoFull = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    Register LoUndef = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    Register HiHalf = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    Register HiFull = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    Register HiUndef = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    Register Wtemp = MRI.createVirtualRegister(&Mips::MSA128WRegClass);
+    BuildMI(*BB, I, DL, TII->get(Mips::IMPLICIT_DEF)).addDef(LoUndef);
+    BuildMI(*BB, I, DL, TII->get(Mips::LWR))
+        .addDef(LoHalf)
+        .addUse(Address)
+        .addImm(Imm + (IsLittle ? 0 : 7))
+        .addUse(LoUndef);
+    BuildMI(*BB, I, DL, TII->get(Mips::LWL))
+        .addDef(LoFull)
+        .addUse(Address)
+        .addImm(Imm + (IsLittle ? 3 : 4))
+        .addUse(LoHalf);
+    BuildMI(*BB, I, DL, TII->get(Mips::IMPLICIT_DEF)).addDef(HiUndef);
+    BuildMI(*BB, I, DL, TII->get(Mips::LWR))
+        .addDef(HiHalf)
+        .addUse(Address)
+        .addImm(Imm + (IsLittle ? 4 : 3))
+        .addUse(HiUndef);
+    BuildMI(*BB, I, DL, TII->get(Mips::LWL))
+        .addDef(HiFull)
+        .addUse(Address)
+        .addImm(Imm + (IsLittle ? 7 : 0))
+        .addUse(HiHalf);
+    BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Wtemp).addUse(LoFull);
+    BuildMI(*BB, I, DL, TII->get(Mips::INSERT_W), Dest)
+        .addUse(Wtemp)
+        .addUse(HiFull)
+        .addImm(1);
+  }
+
+  MI.eraseFromParent();
+  return BB;
+}
+
+MachineBasicBlock *MipsTargetLowering::emitSTRQ_W(MachineInstr &MI,
+                                                  MachineBasicBlock *BB) const {
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  const bool IsLittle = Subtarget.isLittle();
+  DebugLoc DL = MI.getDebugLoc();
+
+  Register StoreVal = MI.getOperand(0).getReg();
+  Register Address = MI.getOperand(1).getReg();
+  unsigned Imm = MI.getOperand(2).getImm();
+
+  MachineBasicBlock::iterator I(MI);
+
+  if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) {
+    // Mips release 6 can store to adress that is not naturally-aligned.
+    Register BitcastW = MRI.createVirtualRegister(&Mips::MSA128WRegClass);
+    Register Tmp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    BuildMI(*BB, I, DL, TII->get(Mips::COPY)).addDef(BitcastW).addUse(StoreVal);
+    BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W))
+        .addDef(Tmp)
+        .addUse(BitcastW)
+        .addImm(0);
+    BuildMI(*BB, I, DL, TII->get(Mips::SW))
+        .addUse(Tmp)
+        .addUse(Address)
+        .addImm(Imm);
+  } else {
+    // Mips release 5 needs to use instructions that can store to an unaligned
+    // memory address.
+    Register Tmp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W))
+        .addDef(Tmp)
+        .addUse(StoreVal)
+        .addImm(0);
+    BuildMI(*BB, I, DL, TII->get(Mips::SWR))
+        .addUse(Tmp)
+        .addUse(Address)
+        .addImm(Imm + (IsLittle ? 0 : 3));
+    BuildMI(*BB, I, DL, TII->get(Mips::SWL))
+        .addUse(Tmp)
+        .addUse(Address)
+        .addImm(Imm + (IsLittle ? 3 : 0));
+  }
+
+  MI.eraseFromParent();
+
+  return BB;
+}
+
+MachineBasicBlock *MipsTargetLowering::emitSTR_D(MachineInstr &MI,
+                                                 MachineBasicBlock *BB) const {
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  const bool IsLittle = Subtarget.isLittle();
+  DebugLoc DL = MI.getDebugLoc();
+
+  Register StoreVal = MI.getOperand(0).getReg();
+  Register Address = MI.getOperand(1).getReg();
+  unsigned Imm = MI.getOperand(2).getImm();
+
+  MachineBasicBlock::iterator I(MI);
+
+  if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) {
+    // Mips release 6 can store to adress that is not naturally-aligned.
+    if (Subtarget.isGP64bit()) {
+      Register BitcastD = MRI.createVirtualRegister(&Mips::MSA128DRegClass);
+      Register Lo = MRI.createVirtualRegister(&Mips::GPR64RegClass);
+      BuildMI(*BB, I, DL, TII->get(Mips::COPY))
+          .addDef(BitcastD)
+          .addUse(StoreVal);
+      BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_D))
+          .addDef(Lo)
+          .addUse(BitcastD)
+          .addImm(0);
+      BuildMI(*BB, I, DL, TII->get(Mips::SD))
+          .addUse(Lo)
+          .addUse(Address)
+          .addImm(Imm);
+    } else {
+      Register BitcastW = MRI.createVirtualRegister(&Mips::MSA128WRegClass);
+      Register Lo = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+      Register Hi = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+      BuildMI(*BB, I, DL, TII->get(Mips::COPY))
+          .addDef(BitcastW)
+          .addUse(StoreVal);
+      BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W))
+          .addDef(Lo)
+          .addUse(BitcastW)
+          .addImm(0);
+      BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W))
+          .addDef(Hi)
+          .addUse(BitcastW)
+          .addImm(1);
+      BuildMI(*BB, I, DL, TII->get(Mips::SW))
+          .addUse(Lo)
+          .addUse(Address)
+          .addImm(Imm + (IsLittle ? 0 : 4));
+      BuildMI(*BB, I, DL, TII->get(Mips::SW))
+          .addUse(Hi)
+          .addUse(Address)
+          .addImm(Imm + (IsLittle ? 4 : 0));
+    }
+  } else {
+    // Mips release 5 needs to use instructions that can store to an unaligned
+    // memory address.
+    Register Bitcast = MRI.createVirtualRegister(&Mips::MSA128WRegClass);
+    Register Lo = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    Register Hi = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    BuildMI(*BB, I, DL, TII->get(Mips::COPY)).addDef(Bitcast).addUse(StoreVal);
+    BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W))
+        .addDef(Lo)
+        .addUse(Bitcast)
+        .addImm(0);
+    BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W))
+        .addDef(Hi)
+        .addUse(Bitcast)
+        .addImm(1);
+    BuildMI(*BB, I, DL, TII->get(Mips::SWR))
+        .addUse(Lo)
+        .addUse(Address)
+        .addImm(Imm + (IsLittle ? 0 : 3));
+    BuildMI(*BB, I, DL, TII->get(Mips::SWL))
+        .addUse(Lo)
+        .addUse(Address)
+        .addImm(Imm + (IsLittle ? 3 : 0));
+    BuildMI(*BB, I, DL, TII->get(Mips::SWR))
+        .addUse(Hi)
+        .addUse(Address)
+        .addImm(Imm + (IsLittle ? 4 : 7));
+    BuildMI(*BB, I, DL, TII->get(Mips::SWL))
+        .addUse(Hi)
+        .addUse(Address)
+        .addImm(Imm + (IsLittle ? 7 : 4));
+  }
+
+  MI.eraseFromParent();
+  return BB;
+}
Index: llvm/include/llvm/IR/IntrinsicsMips.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsMips.td
+++ llvm/include/llvm/IR/IntrinsicsMips.td
@@ -1271,6 +1271,13 @@
   Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
   [IntrReadMem, IntrArgMemOnly]>;
 
+def int_mips_ldr_d : GCCBuiltin<"__builtin_msa_ldr_d">,
+  Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_mips_ldrq_w : GCCBuiltin<"__builtin_msa_ldrq_w">,
+  Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+
 def int_mips_ldi_b : GCCBuiltin<"__builtin_msa_ldi_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
 def int_mips_ldi_h : GCCBuiltin<"__builtin_msa_ldi_h">,
@@ -1695,6 +1702,13 @@
   Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty],
   [IntrArgMemOnly]>;
 
+def int_mips_str_d : GCCBuiltin<"__builtin_msa_str_d">,
+  Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty],
+  [IntrArgMemOnly]>;
+def int_mips_strq_w : GCCBuiltin<"__builtin_msa_strq_w">,
+  Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty],
+  [IntrArgMemOnly]>;
+
 def int_mips_subs_s_b : GCCBuiltin<"__builtin_msa_subs_s_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
 def int_mips_subs_s_h : GCCBuiltin<"__builtin_msa_subs_s_h">,
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -2744,10 +2744,14 @@
   case Mips::BI__builtin_msa_ld_h: i = 1; l = -1024; u = 1022; m = 2; break;
   case Mips::BI__builtin_msa_ld_w: i = 1; l = -2048; u = 2044; m = 4; break;
   case Mips::BI__builtin_msa_ld_d: i = 1; l = -4096; u = 4088; m = 8; break;
+  case Mips::BI__builtin_msa_ldr_d: i = 1; l = -4096; u = 4088; m = 8; break;
+  case Mips::BI__builtin_msa_ldrq_w: i = 1; l = -2048; u = 2044; m = 4; break;
   case Mips::BI__builtin_msa_st_b: i = 2; l = -512; u = 511; m = 1; break;
   case Mips::BI__builtin_msa_st_h: i = 2; l = -1024; u = 1022; m = 2; break;
   case Mips::BI__builtin_msa_st_w: i = 2; l = -2048; u = 2044; m = 4; break;
   case Mips::BI__builtin_msa_st_d: i = 2; l = -4096; u = 4088; m = 8; break;
+  case Mips::BI__builtin_msa_str_d: i = 2; l = -4096; u = 4088; m = 8; break;
+  case Mips::BI__builtin_msa_strq_w: i = 2; l = -2048; u = 2044; m = 4; break;
   }
 
   if (!m)
Index: clang/lib/Headers/msa.h
===================================================================
--- clang/lib/Headers/msa.h
+++ clang/lib/Headers/msa.h
@@ -212,10 +212,14 @@
 #define __msa_ld_h __builtin_msa_ld_h
 #define __msa_ld_w __builtin_msa_ld_w
 #define __msa_ld_d __builtin_msa_ld_d
+#define __msa_ldr_d __builtin_msa_ldr_d
+#define __msa_ldrq_w __builtin_msa_ldrq_w
 #define __msa_st_b __builtin_msa_st_b
 #define __msa_st_h __builtin_msa_st_h
 #define __msa_st_w __builtin_msa_st_w
 #define __msa_st_d __builtin_msa_st_d
+#define __msa_str_d __builtin_msa_str_d
+#define __msa_strq_w __builtin_msa_strq_w
 #define __msa_sat_s_b __builtin_msa_sat_s_b
 #define __msa_sat_s_h __builtin_msa_sat_s_h
 #define __msa_sat_s_w __builtin_msa_sat_s_w
Index: clang/include/clang/Basic/BuiltinsMips.def
===================================================================
--- clang/include/clang/Basic/BuiltinsMips.def
+++ clang/include/clang/Basic/BuiltinsMips.def
@@ -635,6 +635,9 @@
 BUILTIN(__builtin_msa_ld_w, "V4Siv*Ii", "nc")
 BUILTIN(__builtin_msa_ld_d, "V2SLLiv*Ii", "nc")
 
+BUILTIN(__builtin_msa_ldr_d, "V2SLLiv*Ii", "nc")
+BUILTIN(__builtin_msa_ldrq_w, "V4Siv*Ii", "nc")
+
 BUILTIN(__builtin_msa_ldi_b, "V16cIi", "nc")
 BUILTIN(__builtin_msa_ldi_h, "V8sIi", "nc")
 BUILTIN(__builtin_msa_ldi_w, "V4iIi", "nc")
@@ -857,6 +860,9 @@
 BUILTIN(__builtin_msa_st_w, "vV4Siv*Ii", "nc")
 BUILTIN(__builtin_msa_st_d, "vV2SLLiv*Ii", "nc")
 
+BUILTIN(__builtin_msa_str_d, "vV2SLLiv*Ii", "nc")
+BUILTIN(__builtin_msa_strq_w, "vV4Siv*Ii", "nc")
+
 BUILTIN(__builtin_msa_subs_s_b, "V16ScV16ScV16Sc", "nc")
 BUILTIN(__builtin_msa_subs_s_h, "V8SsV8SsV8Ss", "nc")
 BUILTIN(__builtin_msa_subs_s_w, "V4SiV4SiV4Si", "nc")
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to