[PATCH] D83955: [PowerPC][Power10] Implementation of 128-bit Binary Vector Multiply builtins

2020-07-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Conanap added reviewers: PowerPC, saghir, nemanjai, hfinkel.
Conanap added projects: LLVM, clang, PowerPC.
Herald added subscribers: steven.zhang, kbarton.

This patch implements 128-bit Binary Vector Multiply builtins for PowerPC10.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D83955

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+; This test case aims to test the vector multiply instructions on Power10.
+
+declare <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64>, <2 x i64>, <1 x i128>) nounwind readnone
+
+define <1 x i128> @test_vmuleud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmuleud:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmuleud v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmuloud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmuloud:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmuloud v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmulesd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmulesd:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmulesd v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmulosd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmulosd:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmulosd v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) nounwind readnone {
+; CHECK-LABEL: test_vmsumcud:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmsumcud v2, v2, v3, v4
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z)
+  ret <1 x i128> %tmp
+}
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -977,20 +977,31 @@
   }
 
   def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulesd $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmulesd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA,
+ v2i64:$vB))]>;
 
   def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmuleud $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmuleud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA,
+ v2i64:$vB))]>;
 
   def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulosd $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmulosd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA,
+ v2i64:$vB))]>;
 
   def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmuloud $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmuloud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA,
+ v2i64:$vB))]>;
 
   def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD),
(ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
-   "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, []>;
+   "vmsumcud $vD, $vA, $vB, $vC", 

[PATCH] D84197: [PowerPC][Power10] 128-bit Vector String Isolate instruction definitions and MC Tests

2020-07-20 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Conanap added reviewers: PowerPC, saghir, nemanjai, hfinkel.
Conanap added projects: LLVM, clang, PowerPC.
Herald added a subscriber: kbarton.

This implements vector string isolate instructions used in vector string 
isolate builtins.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D84197

Files:
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
  llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s

Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
===
--- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -405,27 +405,24 @@
 # CHECK-BE: vinsdrx 1, 2, 3   # encoding: [0x10,0x22,0x1b,0xcf]
 # CHECK-LE: vinsdrx 1, 2, 3   # encoding: [0xcf,0x1b,0x22,0x10]
 vinsdrx 1, 2, 3
-# CHECK-BE: lxvrbx 32, 1, 2   # encoding: [0x7c,0x01,0x10,0x1b]
-# CHECK-LE: lxvrbx 32, 1, 2   # encoding: [0x1b,0x10,0x01,0x7c]
-lxvrbx 32, 1, 2
-# CHECK-BE: lxvrhx 33, 1, 2   # encoding: [0x7c,0x21,0x10,0x5b]
-# CHECK-LE: lxvrhx 33, 1, 2   # encoding: [0x5b,0x10,0x21,0x7c]
-lxvrhx 33, 1, 2
-# CHECK-BE: lxvrdx 34, 1, 2   # encoding: [0x7c,0x41,0x10,0xdb]
-# CHECK-LE: lxvrdx 34, 1, 2   # encoding: [0xdb,0x10,0x41,0x7c]
-lxvrdx 34, 1, 2
-# CHECK-BE: lxvrwx 35, 1, 2   # encoding: [0x7c,0x61,0x10,0x9b]
-# CHECK-LE: lxvrwx 35, 1, 2   # encoding: [0x9b,0x10,0x61,0x7c]
-lxvrwx 35, 1, 2
-# CHECK-BE: stxvrbx 32, 3, 1  # encoding: [0x7c,0x03,0x09,0x1b]
-# CHECK-LE: stxvrbx 32, 3, 1  # encoding: [0x1b,0x09,0x03,0x7c]
-stxvrbx 32, 3, 1
-# CHECK-BE: stxvrhx 33, 3, 1  # encoding: [0x7c,0x23,0x09,0x5b]
-# CHECK-LE: stxvrhx 33, 3, 1  # encoding: [0x5b,0x09,0x23,0x7c]
-stxvrhx 33, 3, 1
-# CHECK-BE: stxvrwx 34, 3, 1  # encoding: [0x7c,0x43,0x09,0x9b]
-# CHECK-LE: stxvrwx 34, 3, 1  # encoding: [0x9b,0x09,0x43,0x7c]
-stxvrwx 34, 3, 1
-# CHECK-BE: stxvrdx 35, 3, 1  # encoding: [0x7c,0x63,0x09,0xdb]
-# CHECK-LE: stxvrdx 35, 3, 1  # encoding: [0xdb,0x09,0x63,0x7c]
-stxvrdx 35, 3, 1
+# CHECK-BE: vstribr 2, 2  # encoding: [0x10,0x41,0x10,0x0d]
+# CHECK-LE: vstribr 2, 2  # encoding: [0x0d,0x10,0x41,0x10]
+vstribr 2, 2
+# CHECK-BE: vstribl 2, 2  # encoding: [0x10,0x40,0x10,0x0d]
+# CHECK-LE: vstribl 2, 2  # encoding: [0x0d,0x10,0x40,0x10]
+vstribl 2, 2
+# CHECK-BE: vstrihr 2, 2  # encoding: [0x10,0x43,0x10,0x0d]
+# CHECK-LE: vstrihr 2, 2  # encoding: [0x0d,0x10,0x43,0x10]
+vstrihr 2, 2
+# CHECK-BE: vstribr. 2, 2 # encoding: [0x10,0x41,0x14,0x0d]
+# CHECK-LE: vstribr. 2, 2 # encoding: [0x0d,0x14,0x41,0x10]
+vstribr. 2, 2
+# CHECK-BE: vstribl. 2, 2 # encoding: [0x10,0x40,0x14,0x0d]
+# CHECK-LE: vstribl. 2, 2 # encoding: [0x0d,0x14,0x40,0x10]
+vstribl. 2, 2
+# CHECK-BE: vstrihr. 2, 2 # encoding: [0x10,0x43,0x14,0x0d]
+# CHECK-LE: vstrihr. 2, 2 # encoding: [0x0d,0x14,0x43,0x10]
+vstrihr. 2, 2
+# CHECK-BE: vstrihl. 2, 2 # encoding: [0x10,0x42,0x14,0x0d]
+# CHECK-LE: vstrihl. 2, 2 # encoding: [0x0d,0x14,0x42,0x10]
+vstrihl. 2, 2
Index: llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
===
--- llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
+++ llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
@@ -279,26 +279,26 @@
 # CHECK: vinsdrx 1, 2, 3
 0x10 0x22 0x1b 0xcf
 
-# CHECK: lxvrbx 32, 1, 2
-0x7c 0x01 0x10 0x1b
+# CHECK: vstribr 2, 2
+0x10 0x41 0x10 0x0d
 
-# CHECK: lxvrhx 33, 1, 2
-0x7c 0x21 0x10 0x5b
+# CHECK: vstribl 2, 2
+0x10 0x40 0x10 0x0d
 
-# CHECK: lxvrdx 34, 1, 2
-0x7c 0x41 0x10 0xdb
+# CHECK: vstrihr 2, 2
+0x10 0x43 0x10 0x0d
 
-# CHECK: lxvrwx 35, 1, 2
-0x7c 0x61 0x10 0x9b
+# CHECK: vstrihl 2, 2
+0x10 0x42 0x10 0x0d
 
-# CHECK: stxvrbx 32, 3, 1
-0x7c 0x03 0x09 0x1b
+# CHECK: vstribr. 2, 2
+0x10 0x41 0x14 0x0d
 
-# CHECK: stxvrhx 33, 3, 1
-0x7c 0x23 0x09 0x5b
+# CHECK: vstribl. 2, 2
+0x10 0x40 0x14 0x0d
 
-# CHECK: stxvrwx 34, 3, 1
-0x7c 0x43 0x09 0x9b
+# CHECK: vstrihr. 2, 2
+0x10 0x43 0x14 0x0d
 
-# CHECK: stxvrdx 35, 3, 1
-0x7c 0x63 0x09 0xdb
+# CHECK: vstrihl. 2, 2
+0x10 0x42 0x14 0x0d
Index: llv

[PATCH] D83338: [PowerPC][Power10] Implemented Vector Shift Builtins

2020-07-20 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 279348.
Conanap marked 8 inline comments as done.
Conanap added a comment.

Fixed function names, test case clean up


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83338/new/

https://reviews.llvm.org/D83338

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-shift.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; These tests ensure that vector shift quadword builtins are correctly
+; exploited and selected for during codeGen.
+
+define dso_local <1 x i128> @test_vec_vslq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vslq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vslq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %slq = call <1 x i128> @llvm.ppc.altivec.vslq(<1 x i128> %a, <1 x i128> %b)
+  ret <1 x i128> %slq
+}
+
+define dso_local <1 x i128> @test_vec_vsrq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsrq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsrq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %slq = call <1 x i128> @llvm.ppc.altivec.vsrq(<1 x i128> %a, <1 x i128> %b)
+  ret <1 x i128> %slq
+}
+
+define dso_local <1 x i128> @test_vec_vsraq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsraq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsraq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %slq = call <1 x i128> @llvm.ppc.altivec.vsraq(<1 x i128> %a, <1 x i128> %b)
+  ret <1 x i128> %slq
+}
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vslq(<1 x i128>, <1 x i128>)
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vsrq(<1 x i128>, <1 x i128>)
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vsraq(<1 x i128>, <1 x i128>)
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1032,9 +1032,9 @@
 (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
 "vrlqmi $vD, $vA, $vB", IIC_VecFP, []>,
 RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
-  def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;
-  def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;
-  def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;
+  def VSLQ  : VX1_Int_Ty<261, "vslq", int_ppc_altivec_vslq, v1i128>;
+  def VSRAQ : VX1_Int_Ty<773, "vsraq", int_ppc_altivec_vsraq, v1i128>;
+  def VSRQ  : VX1_Int_Ty<517, "vsrq", int_ppc_altivec_vsrq, v1i128>;
   def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
   def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;
   def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -797,6 +797,7 @@
 def int_ppc_altivec_vsrv  : PowerPC_Vec_BBB_Intrinsic<"vsrv">;
 def int_ppc_altivec_vslh  : PowerPC_Vec_HHH_Intrinsic<"vslh">;
 def int_ppc_altivec_vslw  : PowerPC_Vec_WWW_Intrinsic<"vslw">;
+def int_ppc_altivec_vslq  : PowerPC_Vec_QQQ_Intrinsic<"vslq">;
 
 // Right Shifts.
 def int_ppc_altivec_vsr   : PowerPC_Vec_WWW_Intrinsic<"vsr">;
@@ -805,9 +806,11 @@
 def int_ppc_altivec_vsrb  : PowerPC_Vec_BBB_Intrinsic<"vsrb">;
 def int_ppc_altivec_vsrh  : PowerPC_Vec_HHH_Intrinsic<"vsrh">;
 def int_ppc_altivec_vsrw  : PowerPC_Vec_WWW_Intrinsic<"vsrw">;
+def int_ppc_altivec_vsrq  : PowerPC_Vec_QQQ_Intrinsic<"vsrq">;
 def int_ppc_altivec_vsrab : PowerPC_Vec_BBB_Intrinsic<"vsrab">;
 def int_ppc_altivec_vsrah : PowerPC_Vec_HHH_Intrinsic<"vsrah">;
 def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">;
+def int_ppc_altivec_vsraq : PowerPC_Vec_QQQ_Intrinsic<"vsraq">;
 
 // Rotates.
 def int_ppc_altivec_vrlb  : PowerPC_Vec_BBB_Intrinsic<"vrlb">;
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -21,6 +21,7 @@
 vector unsigned int vuia, vuib, vuic;
 vector signed lon

[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-20 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 279362.
Conanap marked 5 inline comments as done.
Conanap added a comment.

Return signature fix, added recognition for ISD:EXTLOAD, some code clean up.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll

Index: llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
===
--- llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -1,4 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
 ; RUN:   FileCheck %s
@@ -33,3 +36,61 @@
   %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i1 0)
   ret i32 %0
 }
+
+; These test cases tests that zero extending loads utilize the Load VSX Vector Rightmost
+; (lxvr[b|h|w|d]x) instructions in Power10.
+
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 3
+; CHECK-NEXT:lxvrdx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
+  %0 = load i64, i64* %add.ptr, align 8
+  %conv = zext i64 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -18,6 +18,15 @@
 // address computations).
 class isPCRel { bit PCRel = 1; }
 
+// PowerPC specific type constraints.
+def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
+  SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+
+// PPC Specific DAG Nodes.
+def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
+   [SDNPHasChain, SDNPMayLoad]>;
+
 // Top-level class for prefixed instructions.
 class PI pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
  InstrItinClass itin> : Instruction {
@@ -996,6 +1005,15 @@
 (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
   def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
 (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
+
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
+ (v1i128 (COPY_TO_REGCLA

[PATCH] D83955: [PowerPC][Power10] Implementation of 128-bit Binary Vector Multiply builtins

2020-07-21 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 279637.
Conanap marked 2 inline comments as done.
Conanap added a comment.

Changed if def to use pwer10_vector


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83955/new/

https://reviews.llvm.org/D83955

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+; This test case aims to test the vector multiply instructions on Power10.
+
+declare <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64>, <2 x i64>, <1 x i128>) nounwind readnone
+
+define <1 x i128> @test_vmuleud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmuleud:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmuleud v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmuloud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmuloud:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmuloud v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmulesd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmulesd:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmulesd v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmulosd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmulosd:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmulosd v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) nounwind readnone {
+; CHECK-LABEL: test_vmsumcud:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmsumcud v2, v2, v3, v4
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z)
+  ret <1 x i128> %tmp
+}
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -993,16 +993,26 @@
   }
 
   def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulesd $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmulesd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA,
+   v2i64:$vB))]>;
   def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmuleud $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmuleud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA,
+   v2i64:$vB))]>;
   def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulosd $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmulosd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA,
+   v2i64:$vB))]>;
   def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmuloud $vD, $vA, $vB", IIC_VecGeneral, []>;
-  def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD),
-   (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
-   "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, []>;
+ "vmuloud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA,
+   v2i64:$vB))]>;
+  def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
+  

[PATCH] D83338: [PowerPC][Power10] Implemented Vector Shift Builtins

2020-07-24 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 280592.
Conanap removed a reviewer: power-llvm-team.
Conanap added a comment.
Herald added a subscriber: kbarton.

Converted the impelmentation to an open coded implementation and updated the 
test cases as appropriate.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83338/new/

https://reviews.llvm.org/D83338

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-shift.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; These tests ensure that vector shift quadword builtins are correctly
+; exploited and selected for during codeGen.
+
+define dso_local <1 x i128> @test_vec_vslq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vslq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vslq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %rem = urem <1 x i128> %b, 
+  %shl = shl <1 x i128> %a, %rem
+  ret <1 x i128> %shl
+}
+
+define dso_local <1 x i128> @test_vec_vsrq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsrq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsrq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %rem = urem <1 x i128> %b, 
+  %shr = lshr <1 x i128> %a, %rem
+  ret <1 x i128> %shr
+}
+
+define dso_local <1 x i128> @test_vec_vsraq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsraq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsraq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %rem = urem <1 x i128> %b, 
+  %shr = ashr <1 x i128> %a, %rem
+  ret <1 x i128> %shr
+}
+
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1100,6 +1100,22 @@
 (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
 }
 
+/* Vector shifts for ISA3_1 */
+let Predicates = [IsISA3_1] in {
+  def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
+}
+
 let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
  def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A,
 i32immNonAllOneNonZero:$A,
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1089,6 +1089,10 @@
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
 }
+
+if(Subtarget.isISA3_1()) {
+  setOperationAction(ISD::SRA, MVT::v1i128, Legal);
+}
   }
 
   if (Subtarget.hasQPX()) {
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -21,6 +21,7 @@
 vector unsigned int vuia, vuib, vuic;
 vector signed long long vslla, vsllb;
 vector unsigned long long vulla, vullb, vullc;
+vector signed __int128 vi128a;
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
@@ -593,3 +594,45 @@
   // CHECK-NEXT: ret i32
   return vec_test_lsbb_all_zeros(vuca);
 }
+
+vector unsigned __int128 test_vec_slq_unsigned (void) {
+  // CHECK-LABEL: test_vec_slq_unsigned
+  // CHECK: shl <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128> %{{.+}}
+  return vec_sl(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_slq_signed (void) {
+  // CHECK-LABEL: test_vec_slq_signed
+  // CHECK: shl <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128>
+ 

[PATCH] D82502: [PowerPC] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-24 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 280597.
Conanap marked 2 inline comments as done.
Conanap retitled this revision from "[PowerPC][Power10]  Implement Load VSX 
Vector and Sign Extend and Zero Extend" to "[PowerPC] Implement Load VSX Vector 
and Sign Extend and Zero Extend".
Conanap edited the summary of this revision.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll

Index: llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
===
--- llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -36,3 +36,61 @@
   %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 0)
   ret i32 %0
 }
+
+; These test cases tests that zero extending loads utilize the Load VSX Vector Rightmost
+; (lxvr[b|h|w|d]x) instructions in Power10.
+
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 3
+; CHECK-NEXT:lxvrdx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
+  %0 = load i64, i64* %add.ptr, align 8
+  %conv = zext i64 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -18,6 +18,15 @@
 // address computations).
 class isPCRel { bit PCRel = 1; }
 
+// PowerPC specific type constraints.
+def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
+  SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+
+// PPC Specific DAG Nodes.
+def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
+   [SDNPHasChain, SDNPMayLoad]>;
+
 // Top-level class for prefixed instructions.
 class PI pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
  InstrItinClass itin> : Instruction {
@@ -1098,6 +1107,15 @@
 (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
   def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
 (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
+
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;
 }
 
 let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -493,6 +493,12 @@
 /// an x

[PATCH] D82502: [PowerPC] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-24 Thread Albion Fung via Phabricator via cfe-commits
Conanap added a comment.

Addressed formatting comments




Comment at: llvm/lib/Target/PowerPC/PPCISelLowering.cpp:14156
+
+  // This transformation is only valid if the we are loading either a byte,
+  // halfword, word, or doubleword.

NeHuang wrote:
> nit: if we are loading either a byte
I'm not too sure what you mean, would you be able to elaborate?  
The comment is:


> This transformation is only valid if the we are loading either a byte,
> halfword, word, or doubleword.

Thanks!




CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82502: [PowerPC] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-27 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 281035.
Conanap added a comment.

Included sext test case.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll

Index: llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
===
--- llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN:   FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \
+; RUN:   FileCheck %s --check-prefix=CHECK-LE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O0 \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN:   FileCheck %s
+; RUN:   FileCheck %s --check-prefix=CHECK-BE
 
 ; These test cases aims to test the builtins for the Power10 VSX vector
 ; instructions introduced in ISA 3.1.
@@ -12,27 +12,224 @@
 declare i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8>, i32)
 
 define signext i32 @test_vec_test_lsbb_all_ones(<16 x i8> %vuca) {
-; CHECK-LABEL: test_vec_test_lsbb_all_ones:
-; CHECK:   # %bb.0: # %entry
-; CHECK-NEXT:xvtlsbb cr0, v2
-; CHECK-NEXT:mfocrf r3, 128
-; CHECK-NEXT:srwi r3, r3, 31
-; CHECK-NEXT:extsw r3, r3
-; CHECK-NEXT:blr
+; CHECK-LE-LABEL: test_vec_test_lsbb_all_ones:
+; CHECK-LE:   # %bb.0: # %entry
+; CHECK-LE-NEXT:xvtlsbb cr0, v2
+; CHECK-LE-NEXT:mfocrf r3, 128
+; CHECK-LE-NEXT:srwi r3, r3, 31
+; CHECK-LE-NEXT:extsw r3, r3
+; CHECK-LE-NEXT:blr
+;
+; CHECK-BE-LABEL: test_vec_test_lsbb_all_ones:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:xvtlsbb cr0, v2
+; CHECK-BE-NEXT:mfocrf r3, 128
+; CHECK-BE-NEXT:srwi r3, r3, 31
+; CHECK-BE-NEXT:extsw r3, r3
+; CHECK-BE-NEXT:blr
 entry:
   %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 1)
   ret i32 %0
 }
 
 define signext i32 @test_vec_test_lsbb_all_zeros(<16 x i8> %vuca) {
-; CHECK-LABEL: test_vec_test_lsbb_all_zeros:
-; CHECK:   # %bb.0: # %entry
-; CHECK-NEXT:xvtlsbb cr0, v2
-; CHECK-NEXT:mfocrf r3, 128
-; CHECK-NEXT:rlwinm r3, r3, 3, 31, 31
-; CHECK-NEXT:extsw r3, r3
-; CHECK-NEXT:blr
+; CHECK-LE-LABEL: test_vec_test_lsbb_all_zeros:
+; CHECK-LE:   # %bb.0: # %entry
+; CHECK-LE-NEXT:xvtlsbb cr0, v2
+; CHECK-LE-NEXT:mfocrf r3, 128
+; CHECK-LE-NEXT:rlwinm r3, r3, 3, 31, 31
+; CHECK-LE-NEXT:extsw r3, r3
+; CHECK-LE-NEXT:blr
+;
+; CHECK-BE-LABEL: test_vec_test_lsbb_all_zeros:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:xvtlsbb cr0, v2
+; CHECK-BE-NEXT:mfocrf r3, 128
+; CHECK-BE-NEXT:rlwinm r3, r3, 3, 31, 31
+; CHECK-BE-NEXT:extsw r3, r3
+; CHECK-BE-NEXT:blr
 entry:
   %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i32 0)
   ret i32 %0
 }
+
+; These test cases tests that zero extending loads utilize the Load VSX Vector Rightmost
+; (lxvr[b|h|w|d]x) instructions in Power10.
+
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) {
+; CHECK-LE-LABEL: vec_xl_zext:
+; CHECK-LE:   # %bb.0: # %entry
+; CHECK-LE-NEXT:lxvrbx v2, r4, r3
+; CHECK-LE-NEXT:blr
+;
+; CHECK-BE-LABEL: vec_xl_zext:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:lxvrbx vs0, r4, r3
+; CHECK-BE-NEXT:xxlor v2, vs0, vs0
+; CHECK-BE-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) {
+; CHECK-LE-LABEL: vec_xl_zext_short:
+; CHECK-LE:   # %bb.0: # %entry
+; CHECK-LE-NEXT:sldi r3, r3, 1
+; CHECK-LE-NEXT:lxvrhx v2, r4, r3
+; CHECK-LE-NEXT:blr
+;
+; CHECK-BE-LABEL: vec_xl_zext_short:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:sldi r3, r3, 1
+; CHECK-BE-NEXT:lxvrhx vs0, r4, r3
+; CHECK-BE-NEXT:xxlor v2, vs0, vs0
+; CHECK-BE-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_w

[PATCH] D84197: [PowerPC][Power10] 128-bit Vector String Isolate instruction definitions and MC Tests

2020-07-27 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 281042.
Conanap added a comment.

Recovered accidentally deleted test cases.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84197/new/

https://reviews.llvm.org/D84197

Files:
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
  llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s

Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
===
--- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -585,3 +585,24 @@
 # CHECK-BE: xscvsqqp 8, 28# encoding: [0xfd,0x0b,0xe6,0x88]
 # CHECK-LE: xscvsqqp 8, 28# encoding: [0x88,0xe6,0x0b,0xfd]
 xscvsqqp 8, 28
+# CHECK-BE: vstribr 2, 2  # encoding: [0x10,0x41,0x10,0x0d]
+# CHECK-LE: vstribr 2, 2  # encoding: [0x0d,0x10,0x41,0x10]
+vstribr 2, 2
+# CHECK-BE: vstribl 2, 2  # encoding: [0x10,0x40,0x10,0x0d]
+# CHECK-LE: vstribl 2, 2  # encoding: [0x0d,0x10,0x40,0x10]
+vstribl 2, 2
+# CHECK-BE: vstrihr 2, 2  # encoding: [0x10,0x43,0x10,0x0d]
+# CHECK-LE: vstrihr 2, 2  # encoding: [0x0d,0x10,0x43,0x10]
+vstrihr 2, 2
+# CHECK-BE: vstribr. 2, 2 # encoding: [0x10,0x41,0x14,0x0d]
+# CHECK-LE: vstribr. 2, 2 # encoding: [0x0d,0x14,0x41,0x10]
+vstribr. 2, 2
+# CHECK-BE: vstribl. 2, 2 # encoding: [0x10,0x40,0x14,0x0d]
+# CHECK-LE: vstribl. 2, 2 # encoding: [0x0d,0x14,0x40,0x10]
+vstribl. 2, 2
+# CHECK-BE: vstrihr. 2, 2 # encoding: [0x10,0x43,0x14,0x0d]
+# CHECK-LE: vstrihr. 2, 2 # encoding: [0x0d,0x14,0x43,0x10]
+vstrihr. 2, 2
+# CHECK-BE: vstrihl. 2, 2 # encoding: [0x10,0x42,0x14,0x0d]
+# CHECK-LE: vstrihl. 2, 2 # encoding: [0x0d,0x14,0x42,0x10]
+vstrihl. 2, 2
Index: llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
===
--- llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
+++ llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
@@ -459,3 +459,26 @@
 # CHECK: xscvsqqp 8, 28
 0xfd 0xb 0xe6 0x88
 
+# CHECK: vstribr 2, 2
+0x10 0x41 0x10 0x0d
+
+# CHECK: vstribl 2, 2
+0x10 0x40 0x10 0x0d
+
+# CHECK: vstrihr 2, 2
+0x10 0x43 0x10 0x0d
+
+# CHECK: vstrihl 2, 2
+0x10 0x42 0x10 0x0d
+
+# CHECK: vstribr. 2, 2
+0x10 0x41 0x14 0x0d
+
+# CHECK: vstribl. 2, 2
+0x10 0x40 0x14 0x0d
+
+# CHECK: vstrihr. 2, 2
+0x10 0x43 0x14 0x0d
+
+# CHECK: vstrihl. 2, 2
+0x10 0x42 0x14 0x0d
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -59,6 +59,39 @@
   string BaseName = "";
 }
 
+// VX-Form: [ PO VT R VB RC XO ]
+class VXForm_VTB5_RC xo, bits<5> R, dag OOL, dag IOL, string asmstr,
+  InstrItinClass itin, list pattern>
+  : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VT;
+  bits<5> VB;
+  bit RC = 0;
+
+  let Pattern = pattern;
+
+  let Inst{6-10} = VT;
+  let Inst{11-15} = R;
+  let Inst{16-20} = VB;
+  let Inst{21} = RC;
+  let Inst{22-31} = xo;
+}
+
+// Multiclass definition to account for record and non-record form
+// instructions of VXRForm.
+multiclass VXForm_VTB5_RCr xo, bits<5> R, dag OOL, dag IOL,
+string asmbase, string asmstr,
+InstrItinClass itin, list pattern> {
+  let BaseName = asmbase in {
+def NAME : VXForm_VTB5_RC, RecFormRel;
+let Defs = [CR6] in
+def _rec : VXForm_VTB5_RC, isRecordForm, RecFormRel;
+  }
+}
+
 class MLS_DForm_R_SI34_RTA5_MEM opcode, dag OOL, dag IOL, string asmstr,
 InstrItinClass itin, list pattern>
   : PI<1, opcode, OOL, IOL, asmstr, itin> {
@@ -821,6 +854,14 @@
   (int_ppc_altivec_vsrdbi v16i8:$VRA,
   v16i8:$VRB, 
   i32:$SH))]>;
+  defm VSTRIBR : VXForm_VTB5_RCr<13, 1, (outs vrrc:$vT), (ins vrrc:$vB),
+ "vstribr", "$vT, $vB", IIC_VecGeneral, []>;
+  defm VSTRIBL : VXForm_VTB5_RCr<13, 0, (outs vrrc:$vT), (ins vrrc:$vB),
+ "vstribl", "$vT, $vB", IIC_VecGeneral, []>;
+  defm VSTRIHR : VXForm_VTB5_RCr<13, 3, (outs vrrc:$vT), (ins vrrc:$vB),
+ "vstrihr", "$vT, $vB", IIC_VecGeneral, []>;
+  defm VSTRIHL : VXForm_VTB5_RCr<13, 2, (outs vrrc:$vT), (ins vrrc:$vB),
+   

[PATCH] D83338: [PowerPC][Power10] Implemented Vector Shift Builtins

2020-07-29 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 281735.
Conanap added a comment.

Fixed formatting based on bot feedback.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83338/new/

https://reviews.llvm.org/D83338

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td

Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1137,6 +1137,22 @@
 (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
 }
 
+/* Vector shifts for ISA3_1 */
+let Predicates = [IsISA3_1] in {
+  def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
+}
+
 let AddedComplexity = 400, Predicates = [IsISA3_1] in {
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$rS, 0)), xoaddr:$src),
 (STXVRBX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1096,6 +1096,10 @@
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
 }
+
+if (Subtarget.isISA3_1()) {
+  setOperationAction(ISD::SRA, MVT::v1i128, Legal);
+}
   }
 
   if (Subtarget.has64BitSupport())
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -17,6 +17,7 @@
 vector unsigned int vuia, vuib, vuic;
 vector signed long long vslla, vsllb;
 vector unsigned long long vulla, vullb, vullc;
++vector signed __int128 vi128a;
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
@@ -634,3 +635,45 @@
   // CHECK-NEXT: ret i32
   return vec_test_lsbb_all_zeros(vuca);
 }
+
+vector unsigned __int128 test_vec_slq_unsigned (void) {
+  // CHECK-LABEL: test_vec_slq_unsigned
+  // CHECK: shl <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128> %{{.+}}
+  return vec_sl(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_slq_signed (void) {
+  // CHECK-LABEL: test_vec_slq_signed
+  // CHECK: shl <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128>
+  return vec_sl(vi128a, vui128a);
+}
+
+vector unsigned __int128 test_vec_srq_unsigned (void) {
+  // CHECK-LABEL: test_vec_srq_unsigned
+  // CHECK: lshr <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128>
+  return vec_sr(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_srq_signed (void) {
+  // CHECK-LABEL: test_vec_srq_signed
+  // CHECK: lshr <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128>
+  return vec_sr(vi128a, vui128a);
+}
+
+vector unsigned __int128 test_vec_sraq_unsigned (void) {
+  // CHECK-LABEL: test_vec_sraq_unsigned
+  // CHECK: ashr <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128>
+  return vec_sra(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_sraq_signed (void) {
+  // CHECK-LABEL: test_vec_sraq_signed
+  // CHECK: ashr <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128>
+  return vec_sra(vi128a, vui128a);
+}
Index: clang/lib/Headers/altivec.h
===
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -17213,6 +17213,56 @@
   return __builtin_vsx_xvtlsbb(__a, 0);
 }
 #endif /* __VSX__ */
+
+/* vs[l | r | ra] */
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_sl(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __a << (__b %
+ (vector unsigned __int128)(sizeof(unsigned __int128) *
+   __CHAR_BIT__));
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_sl(vector signed __int128 __a, vector unsigned __int128 __b) {
+  // return (vector signed __int128)vec_sl((vector unsigned __int128)__a, __b);
+  return __a << (__b %
+ (vector unsigned __int128)(sizeof(unsigned __int128) *
+   __CHAR_BIT__));

[PATCH] D83338: [PowerPC][Power10] Implemented Vector Shift Builtins

2020-07-29 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 281783.
Conanap added a comment.

Replaced a forgotten test file.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83338/new/

https://reviews.llvm.org/D83338

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-shift.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; These tests ensure that vector shift quadword builtins are correctly
+; exploited and selected for during codeGen.
+
+define dso_local <1 x i128> @test_vec_vslq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vslq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vslq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %rem = urem <1 x i128> %b, 
+  %shl = shl <1 x i128> %a, %rem
+  ret <1 x i128> %shl
+}
+
+define dso_local <1 x i128> @test_vec_vsrq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsrq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsrq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %rem = urem <1 x i128> %b, 
+  %shr = lshr <1 x i128> %a, %rem
+  ret <1 x i128> %shr
+}
+
+define dso_local <1 x i128> @test_vec_vsraq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsraq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsraq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %rem = urem <1 x i128> %b, 
+  %shr = ashr <1 x i128> %a, %rem
+  ret <1 x i128> %shr
+}
+
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1137,6 +1137,22 @@
 (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
 }
 
+/* Vector shifts for ISA3_1 */
+let Predicates = [IsISA3_1] in {
+  def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
+}
+
 let AddedComplexity = 400, Predicates = [IsISA3_1] in {
   def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$rS, 0)), xoaddr:$src),
 (STXVRBX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>;
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1096,6 +1096,10 @@
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
 }
+
+if (Subtarget.isISA3_1()) {
+  setOperationAction(ISD::SRA, MVT::v1i128, Legal);
+}
   }
 
   if (Subtarget.has64BitSupport())
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -17,6 +17,7 @@
 vector unsigned int vuia, vuib, vuic;
 vector signed long long vslla, vsllb;
 vector unsigned long long vulla, vullb, vullc;
++vector signed __int128 vi128a;
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
@@ -634,3 +635,45 @@
   // CHECK-NEXT: ret i32
   return vec_test_lsbb_all_zeros(vuca);
 }
+
+vector unsigned __int128 test_vec_slq_unsigned (void) {
+  // CHECK-LABEL: test_vec_slq_unsigned
+  // CHECK: shl <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128> %{{.+}}
+  return vec_sl(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_slq_signed (void) {
+  // CHECK-LABEL: test_vec_slq_signed
+  // CHECK: shl <1 x i128> %{{.+}}, %{{.+}}
+  // CHECK: ret <1 x i128>
+  return vec_sl(vi128a, vui128a);
+}
+
+vector unsigned __int128 test_vec_srq_unsigned (void) {
+  // CHECK-LABEL: test_vec_s

[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-02 Thread Albion Fung via Phabricator via cfe-commits
Conanap marked 2 inline comments as done.
Conanap added a comment.

Addressed Anil's comments with regards to the test cases




Comment at: llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll:56
+
+; CHECK: lxvrdx
+; Function Attrs: norecurse nounwind readonly

anil9 wrote:
> I am not too familiar with the builtins but I never saw a check outside of 
> the two braces in the test cases before,  is it not posible to include it 
> inside the test cases ?
these were extra checks that I should've removed; thanks for catching it


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-02 Thread Albion Fung via Phabricator via cfe-commits
Conanap added a comment.

Addressed Lei's comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-02 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 275124.
Conanap marked 4 inline comments as done.
Conanap added a comment.

Fixed some formatting stuff.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
  llvm/test/MC/Disassembler/PowerPC/p10insts.txt
  llvm/test/MC/PowerPC/p10.s

Index: llvm/test/MC/PowerPC/p10.s
===
--- llvm/test/MC/PowerPC/p10.s
+++ llvm/test/MC/PowerPC/p10.s
@@ -33,3 +33,15 @@
 # CHECK-BE: vclrrb 1, 4, 3# encoding: [0x10,0x24,0x19,0xcd]
 # CHECK-LE: vclrrb 1, 4, 3# encoding: [0xcd,0x19,0x24,0x10]
 vclrrb 1, 4, 3
+# CHECK-BE: lxvrbx 32, 1, 2   # encoding: [0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2   # encoding: [0x1b,0x10,0x01,0x7c]
+lxvrbx 32, 1, 2
+# CHECK-BE: lxvrhx 33, 1, 2   # encoding: [0x7c,0x21,0x10,0x5b]
+# CHECK-LE: lxvrhx 33, 1, 2   # encoding: [0x5b,0x10,0x21,0x7c]
+lxvrhx 33, 1, 2
+# CHECK-BE: lxvrdx 34, 1, 2   # encoding: [0x7c,0x41,0x10,0xdb]
+# CHECK-LE: lxvrdx 34, 1, 2   # encoding: [0xdb,0x10,0x41,0x7c]
+lxvrdx 34, 1, 2
+# CHECK-BE: lxvrwx 35, 1, 2   # encoding: [0x7c,0x61,0x10,0x9b]
+# CHECK-LE: lxvrwx 35, 1, 2   # encoding: [0x9b,0x10,0x61,0x7c]
+lxvrwx 35, 1, 2
Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt
===
--- llvm/test/MC/Disassembler/PowerPC/p10insts.txt
+++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt
@@ -30,3 +30,15 @@
 
 # CHECK: vclrrb 1, 4, 3
 0x10 0x24 0x19 0xcd
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
Index: llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; These test cases tests that zero extending loads utilize the Load VSX Vector Rightmost
+
+; (lxvr[b|h|w|d]x) instructions in Power10.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 3
+; CHECK-NEXT:lxvrdx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i6

[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-02 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 275159.
Conanap added a comment.

Fixed a missing comma; ensured it builds.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
  llvm/test/MC/Disassembler/PowerPC/p10insts.txt
  llvm/test/MC/PowerPC/p10.s

Index: llvm/test/MC/PowerPC/p10.s
===
--- llvm/test/MC/PowerPC/p10.s
+++ llvm/test/MC/PowerPC/p10.s
@@ -33,3 +33,15 @@
 # CHECK-BE: vclrrb 1, 4, 3# encoding: [0x10,0x24,0x19,0xcd]
 # CHECK-LE: vclrrb 1, 4, 3# encoding: [0xcd,0x19,0x24,0x10]
 vclrrb 1, 4, 3
+# CHECK-BE: lxvrbx 32, 1, 2   # encoding: [0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2   # encoding: [0x1b,0x10,0x01,0x7c]
+lxvrbx 32, 1, 2
+# CHECK-BE: lxvrhx 33, 1, 2   # encoding: [0x7c,0x21,0x10,0x5b]
+# CHECK-LE: lxvrhx 33, 1, 2   # encoding: [0x5b,0x10,0x21,0x7c]
+lxvrhx 33, 1, 2
+# CHECK-BE: lxvrdx 34, 1, 2   # encoding: [0x7c,0x41,0x10,0xdb]
+# CHECK-LE: lxvrdx 34, 1, 2   # encoding: [0xdb,0x10,0x41,0x7c]
+lxvrdx 34, 1, 2
+# CHECK-BE: lxvrwx 35, 1, 2   # encoding: [0x7c,0x61,0x10,0x9b]
+# CHECK-LE: lxvrwx 35, 1, 2   # encoding: [0x9b,0x10,0x61,0x7c]
+lxvrwx 35, 1, 2
Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt
===
--- llvm/test/MC/Disassembler/PowerPC/p10insts.txt
+++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt
@@ -30,3 +30,15 @@
 
 # CHECK: vclrrb 1, 4, 3
 0x10 0x24 0x19 0xcd
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
Index: llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; These test cases tests that zero extending loads utilize the Load VSX Vector Rightmost
+
+; (lxvr[b|h|w|d]x) instructions in Power10.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 3
+; CHECK-NEXT:lxvrdx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__of

[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-02 Thread Albion Fung via Phabricator via cfe-commits
Conanap marked an inline comment as done.
Conanap added a comment.

fixed a missing comma and ensure that it compiles


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-03 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 275423.
Conanap marked 3 inline comments as done.
Conanap added a comment.

Moved some code to the top of the file as per Lei's request


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
  llvm/test/MC/Disassembler/PowerPC/p10insts.txt
  llvm/test/MC/PowerPC/p10.s

Index: llvm/test/MC/PowerPC/p10.s
===
--- llvm/test/MC/PowerPC/p10.s
+++ llvm/test/MC/PowerPC/p10.s
@@ -33,3 +33,15 @@
 # CHECK-BE: vclrrb 1, 4, 3# encoding: [0x10,0x24,0x19,0xcd]
 # CHECK-LE: vclrrb 1, 4, 3# encoding: [0xcd,0x19,0x24,0x10]
 vclrrb 1, 4, 3
+# CHECK-BE: lxvrbx 32, 1, 2   # encoding: [0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2   # encoding: [0x1b,0x10,0x01,0x7c]
+lxvrbx 32, 1, 2
+# CHECK-BE: lxvrhx 33, 1, 2   # encoding: [0x7c,0x21,0x10,0x5b]
+# CHECK-LE: lxvrhx 33, 1, 2   # encoding: [0x5b,0x10,0x21,0x7c]
+lxvrhx 33, 1, 2
+# CHECK-BE: lxvrdx 34, 1, 2   # encoding: [0x7c,0x41,0x10,0xdb]
+# CHECK-LE: lxvrdx 34, 1, 2   # encoding: [0xdb,0x10,0x41,0x7c]
+lxvrdx 34, 1, 2
+# CHECK-BE: lxvrwx 35, 1, 2   # encoding: [0x7c,0x61,0x10,0x9b]
+# CHECK-LE: lxvrwx 35, 1, 2   # encoding: [0x9b,0x10,0x61,0x7c]
+lxvrwx 35, 1, 2
Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt
===
--- llvm/test/MC/Disassembler/PowerPC/p10insts.txt
+++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt
@@ -30,3 +30,15 @@
 
 # CHECK: vclrrb 1, 4, 3
 0x10 0x24 0x19 0xcd
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
Index: llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; These test cases tests that zero extending loads utilize the Load VSX Vector Rightmost
+; (lxvr[b|h|w|d]x) instructions in Power10.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 3
+; CHECK-NEXT:lxvrdx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
+  %0 = load i64, i64* %add.ptr, align 8
+  %conv = zext i64 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
Index: llvm/lib/Target/Po

[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-03 Thread Albion Fung via Phabricator via cfe-commits
Conanap marked 2 inline comments as done.
Conanap added a comment.

Addressed Lei's comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83338: [PowerPC][Power10] Implemented Vector Shift Builtins

2020-07-07 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Conanap added reviewers: PowerPC, power-llvm-team, saghir, nemanjai, hfinkel.
Conanap added projects: LLVM, clang, PowerPC.

Implemented the following vector right and left shift builtins and its test 
cases:

  vector unsigned __int128 vec_sl(vector unsigned __int128 a, vector unsigned 
__int128 b)
  vector signed __int128 vec_sl(vector signed __int128 a, vector unsigned 
__int128 b)
  vector unsigned __int128 vec_sr(vector unsigned __int128 a, vector unsigned 
__int128 b)
  vector signed __int128 vec_sr(vector signed __int128 a, vector unsigned 
__int128 b)
  vector unsigned __int128 vec_sra(vector unsigned __int128 a, vector unsigned 
__int128 b)
  vector signed __int128 vec_sra(vector signed __int128 a, vector unsigned 
__int128 b)


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D83338

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
  llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s

Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
===
--- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -405,3 +405,12 @@
 # CHECK-BE: vinsdrx 1, 2, 3   # encoding: [0x10,0x22,0x1b,0xcf]
 # CHECK-LE: vinsdrx 1, 2, 3   # encoding: [0xcf,0x1b,0x22,0x10]
 vinsdrx 1, 2, 3
+# CHECK-BE: vslq 2, 3, 4  # encoding: [0x10,0x43,0x21,0x05]
+# CHECK-LE: vslq 2, 3, 4  # encoding: [0x05,0x21,0x43,0x10]
+vslq 2, 3, 4
+# CHECK-BE: vsraq 2, 3, 4 # encoding: [0x10,0x43,0x23,0x05]
+# CHECK-LE: vsraq 2, 3, 4 # encoding: [0x05,0x23,0x43,0x10]
+vsraq 2, 3, 4
+# CHECK-BE: vsrq 2, 3, 4  # encoding: [0x10,0x43,0x22,0x05]
+# CHECK-LE: vsrq 2, 3, 4  # encoding: [0x05,0x22,0x43,0x10]
+vsrq 2, 3, 4
Index: llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
===
--- llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
+++ llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
@@ -278,3 +278,12 @@
 
 # CHECK: vinsdrx 1, 2, 3
 0x10 0x22 0x1b 0xcf
+
+# CHECK: vsrq 2, 3, 4
+0x10 0x43 0x22 0x05
+
+# CHECK: vslq 2, 3, 4
+0x10 0x43 0x21 0x05
+
+# CHECK: vsraq 2, 3, 4
+0x10 0x43 0x23 0x05
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -915,6 +915,11 @@
  "vclrrb $vD, $vA, $rB", IIC_VecGeneral,
  [(set v16i8:$vD,
(int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>;
+
+  def VSLQ   : VX1_Int_Ty< 261, "vslq", int_ppc_altivec_vslq, v1i128>;
+  def VSRAQ  : VX1_Int_Ty< 773, "vsraq", int_ppc_altivec_vsraq, v1i128>;
+  def VSRQ   : VX1_Int_Ty< 517, "vsrq" , int_ppc_altivec_vsrq, v1i128>;
+
 }
 
 // Anonymous Patterns //
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -788,6 +788,7 @@
 def int_ppc_altivec_vsrv  : PowerPC_Vec_BBB_Intrinsic<"vsrv">;
 def int_ppc_altivec_vslh  : PowerPC_Vec_HHH_Intrinsic<"vslh">;
 def int_ppc_altivec_vslw  : PowerPC_Vec_WWW_Intrinsic<"vslw">;
+def int_ppc_altivec_vslq  : PowerPC_Vec_QQQ_Intrinsic<"vslq">;
 
 // Right Shifts.
 def int_ppc_altivec_vsr   : PowerPC_Vec_WWW_Intrinsic<"vsr">;
@@ -796,9 +797,11 @@
 def int_ppc_altivec_vsrb  : PowerPC_Vec_BBB_Intrinsic<"vsrb">;
 def int_ppc_altivec_vsrh  : PowerPC_Vec_HHH_Intrinsic<"vsrh">;
 def int_ppc_altivec_vsrw  : PowerPC_Vec_WWW_Intrinsic<"vsrw">;
+def int_ppc_altivec_vsrq   : PowerPC_Vec_QQQ_Intrinsic<"vsrq">;
 def int_ppc_altivec_vsrab : PowerPC_Vec_BBB_Intrinsic<"vsrab">;
 def int_ppc_altivec_vsrah : PowerPC_Vec_HHH_Intrinsic<"vsrah">;
 def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">;
+def int_ppc_altivec_vsraq : PowerPC_Vec_QQQ_Intrinsic<"vsraq">;
 
 // Rotates.
 def int_ppc_altivec_vrlb  : PowerPC_Vec_BBB_Intrinsic<"vrlb">;
Index: clang/lib/Headers/altivec.h
===
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -17095,6 +17095,37 @@
   return __builtin_vsx_xxblendvd(__a, __b, __c);
 }
 #endif /* __VSX__ */
+
+/* vector shifts for quadwords */
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_sl(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __builtin_altivec_vsl(_

[PATCH] D83364: [PowerPC][Power10] Implement Instruction definition and MC Tests for Load and Store VSX Vector with Zero or Sign Extend

2020-07-07 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Conanap added reviewers: power-llvm-team, PowerPC, saghir, nemanjai, hfinkel.
Conanap added projects: LLVM, clang, PowerPC.

Includes instruction defintion and MC Tests for above instructions.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D83364

Files:
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
  llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s


Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
===
--- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -405,3 +405,27 @@
 # CHECK-BE: vinsdrx 1, 2, 3   # encoding: 
[0x10,0x22,0x1b,0xcf]
 # CHECK-LE: vinsdrx 1, 2, 3   # encoding: 
[0xcf,0x1b,0x22,0x10]
 vinsdrx 1, 2, 3
+# CHECK-BE: lxvrbx 32, 1, 2   # encoding: 
[0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2   # encoding: 
[0x1b,0x10,0x01,0x7c]
+lxvrbx 32, 1, 2
+# CHECK-BE: lxvrhx 33, 1, 2   # encoding: 
[0x7c,0x21,0x10,0x5b]
+# CHECK-LE: lxvrhx 33, 1, 2   # encoding: 
[0x5b,0x10,0x21,0x7c]
+lxvrhx 33, 1, 2
+# CHECK-BE: lxvrdx 34, 1, 2   # encoding: 
[0x7c,0x41,0x10,0xdb]
+# CHECK-LE: lxvrdx 34, 1, 2   # encoding: 
[0xdb,0x10,0x41,0x7c]
+lxvrdx 34, 1, 2
+# CHECK-BE: lxvrwx 35, 1, 2   # encoding: 
[0x7c,0x61,0x10,0x9b]
+# CHECK-LE: lxvrwx 35, 1, 2   # encoding: 
[0x9b,0x10,0x61,0x7c]
+lxvrwx 35, 1, 2
+# CHECK-BE: stxvrbx 32, 3, 1  # encoding: 
[0x7c,0x03,0x09,0x1b]
+# CHECK-LE: stxvrbx 32, 3, 1  # encoding: 
[0x1b,0x09,0x03,0x7c]
+stxvrbx 32, 3, 1
+# CHECK-BE: stxvrhx 33, 3, 1  # encoding: 
[0x7c,0x23,0x09,0x5b]
+# CHECK-LE: stxvrhx 33, 3, 1  # encoding: 
[0x5b,0x09,0x23,0x7c]
+stxvrhx 33, 3, 1
+# CHECK-BE: stxvrwx 34, 3, 1  # encoding: 
[0x7c,0x43,0x09,0x9b]
+# CHECK-LE: stxvrwx 34, 3, 1  # encoding: 
[0x9b,0x09,0x43,0x7c]
+stxvrwx 34, 3, 1
+# CHECK-BE: stxvrdx 35, 3, 1  # encoding: 
[0x7c,0x63,0x09,0xdb]
+# CHECK-LE: stxvrdx 35, 3, 1  # encoding: 
[0xdb,0x09,0x63,0x7c]
+stxvrdx 35, 3, 1
Index: llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
===
--- llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
+++ llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
@@ -278,3 +278,27 @@
 
 # CHECK: vinsdrx 1, 2, 3
 0x10 0x22 0x1b 0xcf
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
+
+# CHECK: stxvrbx 32, 3, 1
+0x7c 0x03 0x09 0x1b
+
+# CHECK: stxvrhx 33, 3, 1
+0x7c 0x23 0x09 0x5b
+
+# CHECK: stxvrwx 34, 3, 1
+0x7c 0x43 0x09 0x9b
+
+# CHECK: stxvrdx 35, 3, 1
+0x7c 0x63 0x09 0xdb
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -428,6 +428,22 @@
 def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">;
 def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">;
 
+let mayLoad = 1, mayStore = 0, Predicates = [IsISA3_1] in {
+  // The XFormMemOp flag is set on the instruction format.
+  def LXVRBX : X_XT6_RA5_RB5<31, 13, "lxvrbx", vsrc, []>;
+  def LXVRHX : X_XT6_RA5_RB5<31, 45, "lxvrhx", vsrc, []>;
+  def LXVRWX : X_XT6_RA5_RB5<31, 77, "lxvrwx", vsrc, []>;
+  def LXVRDX : X_XT6_RA5_RB5<31, 109, "lxvrdx", vsrc, []>;
+}
+
+let mayLoad = 0, mayStore = 1, Predicates = [IsISA3_1] in {
+  // The XFormMemOp flag is set on the instruction format.
+  def STXVRBX : X_XS6_RA5_RB5<31, 141, "stxvrbx", vsrc, []>;
+  def STXVRHX : X_XS6_RA5_RB5<31, 173, "stxvrhx", vsrc, []>;
+  def STXVRWX : X_XS6_RA5_RB5<31, 205, "stxvrwx", vsrc, []>;
+  def STXVRDX : X_XS6_RA5_RB5<31, 237, "stxvrdx", vsrc, []>;
+}
+
 let Predicates = [PrefixInstrs] in {
   let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
 defm PADDI8 :


Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
===
--- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -405,3 +405,27 @@
 # CHECK-BE: vinsdrx 1, 2, 3   # encoding: [0x10,0x22,0x1b,0xcf]
 # CHECK-LE: vinsdrx 1, 2, 3   # encoding: [0xcf,0x1b,0x22,0x10]
 vinsdrx 1, 2, 3
+# CHECK-BE: lxvrbx 32, 1, 2   # encoding: [0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2

[PATCH] D83364: [PowerPC][Power10] Implement Instruction definition and MC Tests for Load and Store VSX Vector with Zero or Sign Extend

2020-07-08 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 276564.
Conanap added a comment.

Relocated some of the instructions to a more appropriate place.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83364/new/

https://reviews.llvm.org/D83364

Files:
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
  llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s


Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
===
--- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -405,3 +405,27 @@
 # CHECK-BE: vinsdrx 1, 2, 3   # encoding: 
[0x10,0x22,0x1b,0xcf]
 # CHECK-LE: vinsdrx 1, 2, 3   # encoding: 
[0xcf,0x1b,0x22,0x10]
 vinsdrx 1, 2, 3
+# CHECK-BE: lxvrbx 32, 1, 2   # encoding: 
[0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2   # encoding: 
[0x1b,0x10,0x01,0x7c]
+lxvrbx 32, 1, 2
+# CHECK-BE: lxvrhx 33, 1, 2   # encoding: 
[0x7c,0x21,0x10,0x5b]
+# CHECK-LE: lxvrhx 33, 1, 2   # encoding: 
[0x5b,0x10,0x21,0x7c]
+lxvrhx 33, 1, 2
+# CHECK-BE: lxvrdx 34, 1, 2   # encoding: 
[0x7c,0x41,0x10,0xdb]
+# CHECK-LE: lxvrdx 34, 1, 2   # encoding: 
[0xdb,0x10,0x41,0x7c]
+lxvrdx 34, 1, 2
+# CHECK-BE: lxvrwx 35, 1, 2   # encoding: 
[0x7c,0x61,0x10,0x9b]
+# CHECK-LE: lxvrwx 35, 1, 2   # encoding: 
[0x9b,0x10,0x61,0x7c]
+lxvrwx 35, 1, 2
+# CHECK-BE: stxvrbx 32, 3, 1  # encoding: 
[0x7c,0x03,0x09,0x1b]
+# CHECK-LE: stxvrbx 32, 3, 1  # encoding: 
[0x1b,0x09,0x03,0x7c]
+stxvrbx 32, 3, 1
+# CHECK-BE: stxvrhx 33, 3, 1  # encoding: 
[0x7c,0x23,0x09,0x5b]
+# CHECK-LE: stxvrhx 33, 3, 1  # encoding: 
[0x5b,0x09,0x23,0x7c]
+stxvrhx 33, 3, 1
+# CHECK-BE: stxvrwx 34, 3, 1  # encoding: 
[0x7c,0x43,0x09,0x9b]
+# CHECK-LE: stxvrwx 34, 3, 1  # encoding: 
[0x9b,0x09,0x43,0x7c]
+stxvrwx 34, 3, 1
+# CHECK-BE: stxvrdx 35, 3, 1  # encoding: 
[0x7c,0x63,0x09,0xdb]
+# CHECK-LE: stxvrdx 35, 3, 1  # encoding: 
[0xdb,0x09,0x63,0x7c]
+stxvrdx 35, 3, 1
Index: llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
===
--- llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
+++ llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
@@ -278,3 +278,27 @@
 
 # CHECK: vinsdrx 1, 2, 3
 0x10 0x22 0x1b 0xcf
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
+
+# CHECK: stxvrbx 32, 3, 1
+0x7c 0x03 0x09 0x1b
+
+# CHECK: stxvrhx 33, 3, 1
+0x7c 0x23 0x09 0x5b
+
+# CHECK: stxvrwx 34, 3, 1
+0x7c 0x43 0x09 0x9b
+
+# CHECK: stxvrdx 35, 3, 1
+0x7c 0x63 0x09 0xdb
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -934,8 +934,25 @@
  "vclrrb $vD, $vA, $rB", IIC_VecGeneral,
  [(set v16i8:$vD,
(int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>;
+
+  // The XFormMemOp flag for the following 8 insts is set on the instruction 
format.
+  let mayLoad = 1, mayStore = 1 in {
+def LXVRBX : X_XT6_RA5_RB5<31, 13, "lxvrbx", vsrc, []>;
+def LXVRHX : X_XT6_RA5_RB5<31, 45, "lxvrhx", vsrc, []>;
+def LXVRWX : X_XT6_RA5_RB5<31, 77, "lxvrwx", vsrc, []>;
+def LXVRDX : X_XT6_RA5_RB5<31, 109, "lxvrdx", vsrc, []>;
+  }
+
+  let mayLoad = 0, mayStore = 1 in {
+def STXVRBX : X_XS6_RA5_RB5<31, 141, "stxvrbx", vsrc, []>;
+def STXVRHX : X_XS6_RA5_RB5<31, 173, "stxvrhx", vsrc, []>;
+def STXVRWX : X_XS6_RA5_RB5<31, 205, "stxvrwx", vsrc, []>;
+def STXVRDX : X_XS6_RA5_RB5<31, 237, "stxvrdx", vsrc, []>;
+  }
 }
 
+
+
 // Anonymous Patterns 
//
 let Predicates = [IsISA3_1] in {
   def : Pat<(v16i8 (int_ppc_vsx_xxgenpcvbm v16i8:$VRB, imm:$IMM)),


Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
===
--- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -405,3 +405,27 @@
 # CHECK-BE: vinsdrx 1, 2, 3   # encoding: [0x10,0x22,0x1b,0xcf]
 # CHECK-LE: vinsdrx 1, 2, 3   # encoding: [0xcf,0x1b,0x22,0x10]
 vinsdrx 1, 2, 3
+# CHECK-BE: lxvrbx 32, 1, 2   # encodin

[PATCH] D83364: [PowerPC][Power10] Implement Instruction definition and MC Tests for Load and Store VSX Vector with Zero or Sign Extend

2020-07-08 Thread Albion Fung via Phabricator via cfe-commits
Conanap marked 2 inline comments as done.
Conanap added a comment.

Relocated the isntr definitions to a more appropriate place.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83364/new/

https://reviews.llvm.org/D83364



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-09 Thread Albion Fung via Phabricator via cfe-commits
Conanap added a comment.

Also removed unnecessary brackets and comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-09 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 276785.
Conanap marked 3 inline comments as done.
Conanap added a comment.

Now depends on D83364 .


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/ISA31-vsx-builtins.ll
  llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
  llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s

Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
===
--- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -429,3 +429,15 @@
 # CHECK-BE: stxvrdx 35, 3, 1  # encoding: [0x7c,0x63,0x09,0xdb]
 # CHECK-LE: stxvrdx 35, 3, 1  # encoding: [0xdb,0x09,0x63,0x7c]
 stxvrdx 35, 3, 1
+# CHECK-BE: lxvrbx 32, 1, 2   # encoding: [0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2   # encoding: [0x1b,0x10,0x01,0x7c]
+lxvrbx 32, 1, 2
+# CHECK-BE: lxvrhx 33, 1, 2   # encoding: [0x7c,0x21,0x10,0x5b]
+# CHECK-LE: lxvrhx 33, 1, 2   # encoding: [0x5b,0x10,0x21,0x7c]
+lxvrhx 33, 1, 2
+# CHECK-BE: lxvrdx 34, 1, 2   # encoding: [0x7c,0x41,0x10,0xdb]
+# CHECK-LE: lxvrdx 34, 1, 2   # encoding: [0xdb,0x10,0x41,0x7c]
+lxvrdx 34, 1, 2
+# CHECK-BE: lxvrwx 35, 1, 2   # encoding: [0x7c,0x61,0x10,0x9b]
+# CHECK-LE: lxvrwx 35, 1, 2   # encoding: [0x9b,0x10,0x61,0x7c]
+lxvrwx 35, 1, 2
Index: llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
===
--- llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
+++ llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
@@ -302,3 +302,15 @@
 
 # CHECK: stxvrdx 35, 3, 1
 0x7c 0x63 0x09 0xdb
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
Index: llvm/test/CodeGen/PowerPC/ISA31-vsx-builtins.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/ISA31-vsx-builtins.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; These test cases tests that zero extending loads utilize the Load VSX Vector Rightmost
+; (lxvr[b|h|w|d]x) instructions in Power10.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 3
+; CHECK-NEXT:lxvrdx v2, r4, r3
+; CHECK-NEXT:

[PATCH] D83364: [PowerPC][Power10] Implement Instruction definition and MC Tests for Load and Store VSX Vector with Zero or Sign Extend

2020-07-09 Thread Albion Fung via Phabricator via cfe-commits
Conanap marked an inline comment as done.
Conanap added inline comments.



Comment at: llvm/lib/Target/PowerPC/PPCInstrPrefix.td:939
+  // The XFormMemOp flag for the following 8 insts is set on the instruction 
format.
+  let mayLoad = 1, mayStore = 1 in {
+def LXVRBX : X_XT6_RA5_RB5<31, 13, "lxvrbx", vsrc, []>;

bsaleil wrote:
> Shouldn't `mayStore` be 0 instead of 1 here ?
yes, thanks; will fix on the commit


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83364/new/

https://reviews.llvm.org/D83364



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83516: [PowerPC][Power10] RFC 2608 Instruction definitions and MC Tests

2020-07-09 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Conanap added reviewers: power-llvm-team, PowerPC, saghir, nemanjai, hfinkel.
Conanap added projects: LLVM, clang, PowerPC.

This implements instruction definitions and MC tests for RFC2608. Please note 
that some instrs have classes that will need to be changed later as their 
classes have not been implemented yet - they will be implemented in their 
respective patches.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D83516

Files:
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
  llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s

Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
===
--- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -405,3 +405,84 @@
 # CHECK-BE: vinsdrx 1, 2, 3   # encoding: [0x10,0x22,0x1b,0xcf]
 # CHECK-LE: vinsdrx 1, 2, 3   # encoding: [0xcf,0x1b,0x22,0x10]
 vinsdrx 1, 2, 3
+# CHECK-BE: vmulesd 1, 2, 3   # encoding: [0x10,0x22,0x1b,0xc8]
+# CHECK-LE: vmulesd 1, 2, 3   # encoding: [0xc8,0x1b,0x22,0x10]
+vmulesd 1, 2, 3
+# CHECK-BE: vmulosd 1, 2, 3   # encoding: [0x10,0x22,0x19,0xc8]
+# CHECK-LE: vmulosd 1, 2, 3   # encoding: [0xc8,0x19,0x22,0x10]
+vmulosd 1, 2, 3
+# CHECK-BE: vmuleud 1, 2, 3   # encoding: [0x10,0x22,0x1a,0xc8]
+# CHECK-LE: vmuleud 1, 2, 3   # encoding: [0xc8,0x1a,0x22,0x10]
+vmuleud 1, 2, 3
+# CHECK-BE: vmuloud 1, 2, 3   # encoding: [0x10,0x22,0x18,0xc8]
+# CHECK-LE: vmuloud 1, 2, 3   # encoding: [0xc8,0x18,0x22,0x10]
+vmuloud 1, 2, 3
+# CHECK-BE: vmsumcud 1, 2, 3, 4   # encoding: [0x10,0x22,0x19,0x17]
+# CHECK-LE: vmsumcud 1, 2, 3, 4   # encoding: [0x17,0x19,0x22,0x10]
+vmsumcud 1, 2, 3, 4
+# CHECK-BE: vdivsq 3, 4, 5# encoding: [0x10,0x64,0x29,0x0b]
+# CHECK-LE: vdivsq 3, 4, 5# encoding: [0x0b,0x29,0x64,0x10]
+vdivsq 3, 4, 5
+# CHECK-BE: vdivuq 3, 4, 5# encoding: [0x10,0x64,0x28,0x0b]
+# CHECK-LE: vdivuq 3, 4, 5# encoding: [0x0b,0x28,0x64,0x10]
+vdivuq 3, 4, 5
+# CHECK-BE: vdivesq 3, 4, 5   # encoding: [0x10,0x64,0x2b,0x0b]
+# CHECK-LE: vdivesq 3, 4, 5   # encoding: [0x0b,0x2b,0x64,0x10]
+vdivesq 3, 4, 5
+# CHECK-BE: vdiveuq 3, 4, 5   # encoding: [0x10,0x64,0x2a,0x0b]
+# CHECK-LE: vdiveuq 3, 4, 5   # encoding: [0x0b,0x2a,0x64,0x10]
+vdiveuq 3, 4, 5
+# CHECK-BE: vcmpequq 4, 5, 6  # encoding: [0x10,0x85,0x31,0xc7]
+# CHECK-LE: vcmpequq 4, 5, 6  # encoding: [0xc7,0x31,0x85,0x10]
+vcmpequq 4, 5, 6
+# CHECK-BE: vcmpequq. 4, 5, 6 # encoding: [0x10,0x85,0x35,0xc7]
+# CHECK-LE: vcmpequq. 4, 5, 6 # encoding: [0xc7,0x35,0x85,0x10]
+vcmpequq. 4, 5, 6
+# CHECK-BE: vcmpgtsq 4, 5, 6  # encoding: [0x10,0x85,0x33,0x87]
+# CHECK-LE: vcmpgtsq 4, 5, 6  # encoding: [0x87,0x33,0x85,0x10]
+vcmpgtsq 4, 5, 6
+# CHECK-BE: vcmpgtsq. 4, 5, 6 # encoding: [0x10,0x85,0x37,0x87]
+# CHECK-LE: vcmpgtsq. 4, 5, 6 # encoding: [0x87,0x37,0x85,0x10]
+vcmpgtsq. 4, 5, 6
+# CHECK-BE: vcmpgtuq 4, 5, 6  # encoding: [0x10,0x85,0x32,0x87]
+# CHECK-LE: vcmpgtuq 4, 5, 6  # encoding: [0x87,0x32,0x85,0x10]
+vcmpgtuq 4, 5, 6
+# CHECK-BE: vcmpgtuq. 4, 5, 6 # encoding: [0x10,0x85,0x36,0x87]
+# CHECK-LE: vcmpgtuq. 4, 5, 6 # encoding: [0x87,0x36,0x85,0x10]
+vcmpgtuq. 4, 5, 6
+# CHECK-BE: vmoduq 3, 4, 5# encoding: [0x10,0x64,0x2e,0x0b]
+# CHECK-LE: vmoduq 3, 4, 5# encoding: [0x0b,0x2e,0x64,0x10]
+vmoduq 3, 4, 5
+# CHECK-BE: vextsd2q 20, 25   # encoding: [0x12,0x9b,0xce,0x02]
+# CHECK-LE: vextsd2q 20, 25   # encoding: [0x02,0xce,0x9b,0x12]
+vextsd2q 20, 25
+# CHECK-BE: vrlq 4, 5, 6  # encoding: [0x10,0x85,0x30,0x05]
+# CHECK-LE: vrlq 4, 5, 6  # encoding: [0x05,0x30,0x85,0x10]
+vrlq 4, 5, 6
+# CHECK-BE: vrlqnm 4, 5, 6# encoding: [0x10,0x85,0x31,0x45]
+# CHECK-LE: vrlqnm 4, 5, 6# encoding: [0x45,0x31,0x85,0x10]
+vrlqnm 4, 5, 6
+# CHECK-BE: vrlqmi 4, 5, 6# encoding: [0x10,0x85,0x30,0x45]
+# CHECK-LE: vrlqmi 4, 5, 6# e

[PATCH] D83516: [PowerPC][Power10] Vector shift Instruction definitions and MC Tests

2020-07-10 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 277127.
Conanap added a comment.

Added a new line to the end of ppc64-encoding-ISA31.txt


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83516/new/

https://reviews.llvm.org/D83516

Files:
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
  llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s

Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
===
--- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -405,3 +405,84 @@
 # CHECK-BE: vinsdrx 1, 2, 3   # encoding: [0x10,0x22,0x1b,0xcf]
 # CHECK-LE: vinsdrx 1, 2, 3   # encoding: [0xcf,0x1b,0x22,0x10]
 vinsdrx 1, 2, 3
+# CHECK-BE: vmulesd 1, 2, 3   # encoding: [0x10,0x22,0x1b,0xc8]
+# CHECK-LE: vmulesd 1, 2, 3   # encoding: [0xc8,0x1b,0x22,0x10]
+vmulesd 1, 2, 3
+# CHECK-BE: vmulosd 1, 2, 3   # encoding: [0x10,0x22,0x19,0xc8]
+# CHECK-LE: vmulosd 1, 2, 3   # encoding: [0xc8,0x19,0x22,0x10]
+vmulosd 1, 2, 3
+# CHECK-BE: vmuleud 1, 2, 3   # encoding: [0x10,0x22,0x1a,0xc8]
+# CHECK-LE: vmuleud 1, 2, 3   # encoding: [0xc8,0x1a,0x22,0x10]
+vmuleud 1, 2, 3
+# CHECK-BE: vmuloud 1, 2, 3   # encoding: [0x10,0x22,0x18,0xc8]
+# CHECK-LE: vmuloud 1, 2, 3   # encoding: [0xc8,0x18,0x22,0x10]
+vmuloud 1, 2, 3
+# CHECK-BE: vmsumcud 1, 2, 3, 4   # encoding: [0x10,0x22,0x19,0x17]
+# CHECK-LE: vmsumcud 1, 2, 3, 4   # encoding: [0x17,0x19,0x22,0x10]
+vmsumcud 1, 2, 3, 4
+# CHECK-BE: vdivsq 3, 4, 5# encoding: [0x10,0x64,0x29,0x0b]
+# CHECK-LE: vdivsq 3, 4, 5# encoding: [0x0b,0x29,0x64,0x10]
+vdivsq 3, 4, 5
+# CHECK-BE: vdivuq 3, 4, 5# encoding: [0x10,0x64,0x28,0x0b]
+# CHECK-LE: vdivuq 3, 4, 5# encoding: [0x0b,0x28,0x64,0x10]
+vdivuq 3, 4, 5
+# CHECK-BE: vdivesq 3, 4, 5   # encoding: [0x10,0x64,0x2b,0x0b]
+# CHECK-LE: vdivesq 3, 4, 5   # encoding: [0x0b,0x2b,0x64,0x10]
+vdivesq 3, 4, 5
+# CHECK-BE: vdiveuq 3, 4, 5   # encoding: [0x10,0x64,0x2a,0x0b]
+# CHECK-LE: vdiveuq 3, 4, 5   # encoding: [0x0b,0x2a,0x64,0x10]
+vdiveuq 3, 4, 5
+# CHECK-BE: vcmpequq 4, 5, 6  # encoding: [0x10,0x85,0x31,0xc7]
+# CHECK-LE: vcmpequq 4, 5, 6  # encoding: [0xc7,0x31,0x85,0x10]
+vcmpequq 4, 5, 6
+# CHECK-BE: vcmpequq. 4, 5, 6 # encoding: [0x10,0x85,0x35,0xc7]
+# CHECK-LE: vcmpequq. 4, 5, 6 # encoding: [0xc7,0x35,0x85,0x10]
+vcmpequq. 4, 5, 6
+# CHECK-BE: vcmpgtsq 4, 5, 6  # encoding: [0x10,0x85,0x33,0x87]
+# CHECK-LE: vcmpgtsq 4, 5, 6  # encoding: [0x87,0x33,0x85,0x10]
+vcmpgtsq 4, 5, 6
+# CHECK-BE: vcmpgtsq. 4, 5, 6 # encoding: [0x10,0x85,0x37,0x87]
+# CHECK-LE: vcmpgtsq. 4, 5, 6 # encoding: [0x87,0x37,0x85,0x10]
+vcmpgtsq. 4, 5, 6
+# CHECK-BE: vcmpgtuq 4, 5, 6  # encoding: [0x10,0x85,0x32,0x87]
+# CHECK-LE: vcmpgtuq 4, 5, 6  # encoding: [0x87,0x32,0x85,0x10]
+vcmpgtuq 4, 5, 6
+# CHECK-BE: vcmpgtuq. 4, 5, 6 # encoding: [0x10,0x85,0x36,0x87]
+# CHECK-LE: vcmpgtuq. 4, 5, 6 # encoding: [0x87,0x36,0x85,0x10]
+vcmpgtuq. 4, 5, 6
+# CHECK-BE: vmoduq 3, 4, 5# encoding: [0x10,0x64,0x2e,0x0b]
+# CHECK-LE: vmoduq 3, 4, 5# encoding: [0x0b,0x2e,0x64,0x10]
+vmoduq 3, 4, 5
+# CHECK-BE: vextsd2q 20, 25   # encoding: [0x12,0x9b,0xce,0x02]
+# CHECK-LE: vextsd2q 20, 25   # encoding: [0x02,0xce,0x9b,0x12]
+vextsd2q 20, 25
+# CHECK-BE: vrlq 4, 5, 6  # encoding: [0x10,0x85,0x30,0x05]
+# CHECK-LE: vrlq 4, 5, 6  # encoding: [0x05,0x30,0x85,0x10]
+vrlq 4, 5, 6
+# CHECK-BE: vrlqnm 4, 5, 6# encoding: [0x10,0x85,0x31,0x45]
+# CHECK-LE: vrlqnm 4, 5, 6# encoding: [0x45,0x31,0x85,0x10]
+vrlqnm 4, 5, 6
+# CHECK-BE: vrlqmi 4, 5, 6# encoding: [0x10,0x85,0x30,0x45]
+# CHECK-LE: vrlqmi 4, 5, 6# encoding: [0x45,0x30,0x85,0x10]
+vrlqmi 4, 5, 6
+# CHECK-BE: vslq 4, 5, 6  # encoding: [0x10,0x85,0x31,0x05]
+# CHECK-LE: vslq 4, 5, 6  # encoding: [0x0

[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-13 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 277415.
Conanap marked 2 inline comments as done.
Conanap added a comment.

Removed unecessary comments and unintended changes.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
  llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s

Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
===
--- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -429,3 +429,15 @@
 # CHECK-BE: stxvrdx 35, 3, 1  # encoding: [0x7c,0x63,0x09,0xdb]
 # CHECK-LE: stxvrdx 35, 3, 1  # encoding: [0xdb,0x09,0x63,0x7c]
 stxvrdx 35, 3, 1
+# CHECK-BE: lxvrbx 32, 1, 2   # encoding: [0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2   # encoding: [0x1b,0x10,0x01,0x7c]
+lxvrbx 32, 1, 2
+# CHECK-BE: lxvrhx 33, 1, 2   # encoding: [0x7c,0x21,0x10,0x5b]
+# CHECK-LE: lxvrhx 33, 1, 2   # encoding: [0x5b,0x10,0x21,0x7c]
+lxvrhx 33, 1, 2
+# CHECK-BE: lxvrdx 34, 1, 2   # encoding: [0x7c,0x41,0x10,0xdb]
+# CHECK-LE: lxvrdx 34, 1, 2   # encoding: [0xdb,0x10,0x41,0x7c]
+lxvrdx 34, 1, 2
+# CHECK-BE: lxvrwx 35, 1, 2   # encoding: [0x7c,0x61,0x10,0x9b]
+# CHECK-LE: lxvrwx 35, 1, 2   # encoding: [0x9b,0x10,0x61,0x7c]
+lxvrwx 35, 1, 2
Index: llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
===
--- llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
+++ llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
@@ -302,3 +302,15 @@
 
 # CHECK: stxvrdx 35, 3, 1
 0x7c 0x63 0x09 0xdb
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -18,6 +18,15 @@
 // address computations).
 class isPCRel { bit PCRel = 1; }
 
+// PowerPC specific type constraints.
+def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
+  SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+
+// PPC Specific DAG Nodes.
+def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
+   [SDNPHasChain, SDNPMayLoad]>;
+
 // Top-level class for prefixed instructions.
 class PI pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
  InstrItinClass itin> : Instruction {
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -493,6 +493,12 @@
 /// an xxswapd.
 LXVD2X,
 
+/// LXVRZX - Load VSX Vector Rightmost and Zero Extend
+/// This node represents v1i128 BUILD_VECTOR of a zero extending load
+/// instruction from  to i128.
+/// Allows utilization of the Load VSX Vector Rightmost Instructions.
+LXVRZX,
+
 /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
 /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
 /// the vector type to load vector in big-endian element order.
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1572,6 +1572,7 @@
   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
   case PPCISD::LD_SPLAT:return "PPCISD::LD_SPLAT";
   case PPCISD::FNMSUB:  return "PPCISD::FNMSUB";
+  case PPCISD::LXVRZX:  return "PPCISD::LXVRZX";
   }
   return nullptr;
 }
@@ -14125,6 +14126,44 @@
   return SDValue();
 }
 
+// Look for the pattern of a load from a narrow width to i128, feeding
+// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
+// (LXVRZX). This node represents a zero extending load that will be matched
+// to the Load VSX Vector Rightmost instructions.
+static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
+  SDLoc dl(N);
+
+  // This combine is only eligible for a BUILD_VECTOR of v1i128.
+  // Other return types are not valid for the LXVRZX replacement.
+  if (N->getValueType(0) != MVT::v1i128)
+return SDValue();
+
+  SDValue Operand = N->getOperand(0);
+  // 

[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-13 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 277418.
Conanap added a comment.

Removed duplicate test code found in the instructions and MC Test 
implementation of VSX Vector store and load with sign extend or zero extend; 
re-added a test file that was omitted in the last diff update.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/ISA31-vsx-builtins.ll
  llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt

Index: llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
===
--- llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
+++ llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
@@ -302,3 +302,15 @@
 
 # CHECK: stxvrdx 35, 3, 1
 0x7c 0x63 0x09 0xdb
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
Index: llvm/test/CodeGen/PowerPC/ISA31-vsx-builtins.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/ISA31-vsx-builtins.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; These test cases tests that zero extending loads utilize the Load VSX Vector Rightmost
+; (lxvr[b|h|w|d]x) instructions in Power10.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 3
+; CHECK-NEXT:lxvrdx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
+  %0 = load i64, i64* %add.ptr, align 8
+  %conv = zext i64 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -18,6 +18,15 @@
 // address computations).
 class isPCRel { bit PCRel = 1; }
 
+// PowerPC specific type constraints.
+def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
+  SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+
+// PPC Specific DAG Nodes.
+def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
+   [SDNPHasChain, SDNPMayLoad]>;
+
 // Top-level class for prefixed instructions.
 class PI pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
  InstrItinClass itin> : Instruction {
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===
--- llvm/lib/Target/PowerPC/PPCISelLowering.

[PATCH] D83516: [PowerPC][Power10] 128-bit Binary Integer Operation instruction definitions and MC Tests

2020-07-13 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 277442.
Conanap marked an inline comment as done.
Conanap added a comment.

Some formatting changes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83516/new/

https://reviews.llvm.org/D83516

Files:
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
  llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s

Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
===
--- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -429,3 +429,84 @@
 # CHECK-BE: stxvrdx 35, 3, 1  # encoding: [0x7c,0x63,0x09,0xdb]
 # CHECK-LE: stxvrdx 35, 3, 1  # encoding: [0xdb,0x09,0x63,0x7c]
 stxvrdx 35, 3, 1
+# CHECK-BE: vmulesd 1, 2, 3   # encoding: [0x10,0x22,0x1b,0xc8]
+# CHECK-LE: vmulesd 1, 2, 3   # encoding: [0xc8,0x1b,0x22,0x10]
+vmulesd 1, 2, 3
+# CHECK-BE: vmulosd 1, 2, 3   # encoding: [0x10,0x22,0x19,0xc8]
+# CHECK-LE: vmulosd 1, 2, 3   # encoding: [0xc8,0x19,0x22,0x10]
+vmulosd 1, 2, 3
+# CHECK-BE: vmuleud 1, 2, 3   # encoding: [0x10,0x22,0x1a,0xc8]
+# CHECK-LE: vmuleud 1, 2, 3   # encoding: [0xc8,0x1a,0x22,0x10]
+vmuleud 1, 2, 3
+# CHECK-BE: vmuloud 1, 2, 3   # encoding: [0x10,0x22,0x18,0xc8]
+# CHECK-LE: vmuloud 1, 2, 3   # encoding: [0xc8,0x18,0x22,0x10]
+vmuloud 1, 2, 3
+# CHECK-BE: vmsumcud 1, 2, 3, 4   # encoding: [0x10,0x22,0x19,0x17]
+# CHECK-LE: vmsumcud 1, 2, 3, 4   # encoding: [0x17,0x19,0x22,0x10]
+vmsumcud 1, 2, 3, 4
+# CHECK-BE: vdivsq 3, 4, 5# encoding: [0x10,0x64,0x29,0x0b]
+# CHECK-LE: vdivsq 3, 4, 5# encoding: [0x0b,0x29,0x64,0x10]
+vdivsq 3, 4, 5
+# CHECK-BE: vdivuq 3, 4, 5# encoding: [0x10,0x64,0x28,0x0b]
+# CHECK-LE: vdivuq 3, 4, 5# encoding: [0x0b,0x28,0x64,0x10]
+vdivuq 3, 4, 5
+# CHECK-BE: vdivesq 3, 4, 5   # encoding: [0x10,0x64,0x2b,0x0b]
+# CHECK-LE: vdivesq 3, 4, 5   # encoding: [0x0b,0x2b,0x64,0x10]
+vdivesq 3, 4, 5
+# CHECK-BE: vdiveuq 3, 4, 5   # encoding: [0x10,0x64,0x2a,0x0b]
+# CHECK-LE: vdiveuq 3, 4, 5   # encoding: [0x0b,0x2a,0x64,0x10]
+vdiveuq 3, 4, 5
+# CHECK-BE: vcmpequq 4, 5, 6  # encoding: [0x10,0x85,0x31,0xc7]
+# CHECK-LE: vcmpequq 4, 5, 6  # encoding: [0xc7,0x31,0x85,0x10]
+vcmpequq 4, 5, 6
+# CHECK-BE: vcmpequq. 4, 5, 6 # encoding: [0x10,0x85,0x35,0xc7]
+# CHECK-LE: vcmpequq. 4, 5, 6 # encoding: [0xc7,0x35,0x85,0x10]
+vcmpequq. 4, 5, 6
+# CHECK-BE: vcmpgtsq 4, 5, 6  # encoding: [0x10,0x85,0x33,0x87]
+# CHECK-LE: vcmpgtsq 4, 5, 6  # encoding: [0x87,0x33,0x85,0x10]
+vcmpgtsq 4, 5, 6
+# CHECK-BE: vcmpgtsq. 4, 5, 6 # encoding: [0x10,0x85,0x37,0x87]
+# CHECK-LE: vcmpgtsq. 4, 5, 6 # encoding: [0x87,0x37,0x85,0x10]
+vcmpgtsq. 4, 5, 6
+# CHECK-BE: vcmpgtuq 4, 5, 6  # encoding: [0x10,0x85,0x32,0x87]
+# CHECK-LE: vcmpgtuq 4, 5, 6  # encoding: [0x87,0x32,0x85,0x10]
+vcmpgtuq 4, 5, 6
+# CHECK-BE: vcmpgtuq. 4, 5, 6 # encoding: [0x10,0x85,0x36,0x87]
+# CHECK-LE: vcmpgtuq. 4, 5, 6 # encoding: [0x87,0x36,0x85,0x10]
+vcmpgtuq. 4, 5, 6
+# CHECK-BE: vmoduq 3, 4, 5# encoding: [0x10,0x64,0x2e,0x0b]
+# CHECK-LE: vmoduq 3, 4, 5# encoding: [0x0b,0x2e,0x64,0x10]
+vmoduq 3, 4, 5
+# CHECK-BE: vextsd2q 20, 25   # encoding: [0x12,0x9b,0xce,0x02]
+# CHECK-LE: vextsd2q 20, 25   # encoding: [0x02,0xce,0x9b,0x12]
+vextsd2q 20, 25
+# CHECK-BE: vrlq 4, 5, 6  # encoding: [0x10,0x85,0x30,0x05]
+# CHECK-LE: vrlq 4, 5, 6  # encoding: [0x05,0x30,0x85,0x10]
+vrlq 4, 5, 6
+# CHECK-BE: vrlqnm 4, 5, 6# encoding: [0x10,0x85,0x31,0x45]
+# CHECK-LE: vrlqnm 4, 5, 6# encoding: [0x45,0x31,0x85,0x10]
+vrlqnm 4, 5, 6
+# CHECK-BE: vrlqmi 4, 5, 6# encoding: [0x10,0x85,0x30,0x45]
+# CHECK-LE: vrlqmi 4, 5, 6# encoding: [0x45,0x30,0x85,0x10]
+vrlqmi 4, 5, 6
+# CHECK-BE: vslq 4, 5, 6  # encoding: [0x10,0x85,0x31,0x05]
+# CHECK-LE: vslq 4, 5, 6  # enc

[PATCH] D83338: [PowerPC][Power10] Implemented Vector Shift Builtins

2020-07-14 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 277896.
Conanap marked 3 inline comments as done.
Conanap added a comment.

Added tests, added extra comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83338/new/

https://reviews.llvm.org/D83338

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-shift.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+@vui128a = dso_local global <1 x i128> zeroinitializer, align 16
+@vui128b = dso_local global <1 x i128> zeroinitializer, align 16
+
+define dso_local <1 x i128> @test_vec_slq(<1 x i128> %a, <1 x i128> %b) #0 {
+; CHECK-LABEL: test_vec_slq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vslq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %slq = call <1 x i128> @llvm.ppc.altivec.vslq(<1 x i128> %a, <1 x i128> %b)
+  ret <1 x i128> %slq
+}
+
+define dso_local <1 x i128> @test_vec_vsrq(<1 x i128> %a, <1 x i128> %b) #0 {
+; CHECK-LABEL: test_vec_vsrq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsrq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %slq = call <1 x i128> @llvm.ppc.altivec.vsrq(<1 x i128> %a, <1 x i128> %b)
+  ret <1 x i128> %slq
+}
+
+define dso_local <1 x i128> @test_vec_vsraq(<1 x i128> %a, <1 x i128> %b) #0 {
+; CHECK-LABEL: test_vec_vsraq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsraq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %slq = call <1 x i128> @llvm.ppc.altivec.vsraq(<1 x i128> %a, <1 x i128> %b)
+  ret <1 x i128> %slq
+}
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vslq(<1 x i128>, <1 x i128>) #1
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vsrq(<1 x i128>, <1 x i128>) #1
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vsraq(<1 x i128>, <1 x i128>) #1
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1027,15 +1027,16 @@
 (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
 "vrlqmi $vD, $vA, $vB", IIC_VecFP, []>,
 RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
-  def VSLQ   : VX1_VT5_VA5_VB5<261, "vslq", []>;
-  def VSRAQ  : VX1_VT5_VA5_VB5<773, "vsraq", []>;
-  def VSRQ   : VX1_VT5_VA5_VB5<517, "vsrq", []>;
+  def VSLQ  : VX1_Int_Ty< 261, "vslq", int_ppc_altivec_vslq, v1i128>;
+  def VSRAQ : VX1_Int_Ty< 773, "vsraq", int_ppc_altivec_vsraq, v1i128>;
+  def VSRQ  : VX1_Int_Ty< 517, "vsrq" , int_ppc_altivec_vsrq, v1i128>;
   def VRLQ   : VX1_VT5_VA5_VB5<5, "vrlq", []>;
 
   def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;
   def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;
   def XSCVUQQP  : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;
   def XSCVSQQP  : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;
+
 }
 
 // Anonymous Patterns //
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -797,6 +797,7 @@
 def int_ppc_altivec_vsrv  : PowerPC_Vec_BBB_Intrinsic<"vsrv">;
 def int_ppc_altivec_vslh  : PowerPC_Vec_HHH_Intrinsic<"vslh">;
 def int_ppc_altivec_vslw  : PowerPC_Vec_WWW_Intrinsic<"vslw">;
+def int_ppc_altivec_vslq  : PowerPC_Vec_QQQ_Intrinsic<"vslq">;
 
 // Right Shifts.
 def int_ppc_altivec_vsr   : PowerPC_Vec_WWW_Intrinsic<"vsr">;
@@ -805,9 +806,11 @@
 def int_ppc_altivec_vsrb  : PowerPC_Vec_BBB_Intrinsic<"vsrb">;
 def int_ppc_altivec_vsrh  : PowerPC_Vec_HHH_Intrinsic<"vsrh">;
 def int_ppc_altivec_vsrw  : PowerPC_Vec_WWW_Intrinsic<"vsrw">;
+def int_ppc_altivec_vsrq  : PowerPC_Vec_QQQ_Intrinsic<"vsrq">;
 def int_ppc_altivec_vsrab : PowerPC_Vec_BBB_Intrinsic<"vsrab">;
 def int_ppc_altivec_vsrah : PowerPC_Vec_HHH_Intrinsic<"vsrah">;
 def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">;
+def int_ppc_altivec_vsraq : PowerPC_Vec_QQQ_Intrinsic<"vsraq">;
 
 // Rotates.
 def int_ppc_altivec_vrlb  : PowerPC_Vec_BBB_Intrinsic<"vrlb">;
Index: clang/test/CodeGen/builtins-pp

[PATCH] D83338: [PowerPC][Power10] Implemented Vector Shift Builtins

2020-07-14 Thread Albion Fung via Phabricator via cfe-commits
Conanap added a comment.

Also removed instr def as it will be part of D83516 
.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83338/new/

https://reviews.llvm.org/D83338



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-14 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 277952.
Conanap added a comment.

Restored accidentally deleted pattern, removed duplicate tests, moved new tests 
to another pre-existing file instead.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll

Index: llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
===
--- llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -1,4 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
 ; RUN:   FileCheck %s
@@ -33,3 +36,63 @@
   %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i1 0)
   ret i32 %0
 }
+
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; These test cases tests that zero extending loads utilize the Load VSX Vector Rightmost
+; (lxvr[b|h|w|d]x) instructions in Power10.
+
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 3
+; CHECK-NEXT:lxvrdx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
+  %0 = load i64, i64* %add.ptr, align 8
+  %conv = zext i64 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -18,6 +18,16 @@
 // address computations).
 class isPCRel { bit PCRel = 1; }
 
+// PowerPC specific type constraints.
+def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
+  SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+
+// PPC Specific DAG Nodes.
+def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
+   [SDNPHasChain, SDNPMayLoad]>;
+
+
 // Top-level class for prefixed instructions.
 class PI pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
  InstrItinClass itin> : Instruction {
@@ -996,6 +1006,15 @@
 (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
   def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
 (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
+
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;
+  def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
+  def

[PATCH] D83338: [PowerPC][Power10] Implemented Vector Shift Builtins

2020-07-15 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 278235.
Conanap marked 7 inline comments as done.
Conanap added a comment.

Formatting fixes, fixed test case return type, updated builtins' signatures to 
correct signatures.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83338/new/

https://reviews.llvm.org/D83338

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-shift.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+define dso_local <1 x i128> @test_vec_slq(<1 x i128> %a, <1 x i128> %b) #0 {
+; CHECK-LABEL: test_vec_slq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vslq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %slq = call <1 x i128> @llvm.ppc.altivec.vslq(<1 x i128> %a, <1 x i128> %b)
+  ret <1 x i128> %slq
+}
+
+define dso_local <1 x i128> @test_vec_vsrq(<1 x i128> %a, <1 x i128> %b) #0 {
+; CHECK-LABEL: test_vec_vsrq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsrq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %slq = call <1 x i128> @llvm.ppc.altivec.vsrq(<1 x i128> %a, <1 x i128> %b)
+  ret <1 x i128> %slq
+}
+
+define dso_local <1 x i128> @test_vec_vsraq(<1 x i128> %a, <1 x i128> %b) #0 {
+; CHECK-LABEL: test_vec_vsraq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsraq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %slq = call <1 x i128> @llvm.ppc.altivec.vsraq(<1 x i128> %a, <1 x i128> %b)
+  ret <1 x i128> %slq
+}
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vslq(<1 x i128>, <1 x i128>) #1
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vsrq(<1 x i128>, <1 x i128>) #1
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vsraq(<1 x i128>, <1 x i128>) #1
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1027,9 +1027,9 @@
 (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
 "vrlqmi $vD, $vA, $vB", IIC_VecFP, []>,
 RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
-  def VSLQ   : VX1_VT5_VA5_VB5<261, "vslq", []>;
-  def VSRAQ  : VX1_VT5_VA5_VB5<773, "vsraq", []>;
-  def VSRQ   : VX1_VT5_VA5_VB5<517, "vsrq", []>;
+  def VSLQ  : VX1_Int_Ty<261, "vslq", int_ppc_altivec_vslq, v1i128>;
+  def VSRAQ : VX1_Int_Ty<773, "vsraq", int_ppc_altivec_vsraq, v1i128>;
+  def VSRQ  : VX1_Int_Ty<517, "vsrq", int_ppc_altivec_vsrq, v1i128>;
   def VRLQ   : VX1_VT5_VA5_VB5<5, "vrlq", []>;
 
   def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -797,6 +797,7 @@
 def int_ppc_altivec_vsrv  : PowerPC_Vec_BBB_Intrinsic<"vsrv">;
 def int_ppc_altivec_vslh  : PowerPC_Vec_HHH_Intrinsic<"vslh">;
 def int_ppc_altivec_vslw  : PowerPC_Vec_WWW_Intrinsic<"vslw">;
+def int_ppc_altivec_vslq  : PowerPC_Vec_QQQ_Intrinsic<"vslq">;
 
 // Right Shifts.
 def int_ppc_altivec_vsr   : PowerPC_Vec_WWW_Intrinsic<"vsr">;
@@ -805,9 +806,11 @@
 def int_ppc_altivec_vsrb  : PowerPC_Vec_BBB_Intrinsic<"vsrb">;
 def int_ppc_altivec_vsrh  : PowerPC_Vec_HHH_Intrinsic<"vsrh">;
 def int_ppc_altivec_vsrw  : PowerPC_Vec_WWW_Intrinsic<"vsrw">;
+def int_ppc_altivec_vsrq  : PowerPC_Vec_QQQ_Intrinsic<"vsrq">;
 def int_ppc_altivec_vsrab : PowerPC_Vec_BBB_Intrinsic<"vsrab">;
 def int_ppc_altivec_vsrah : PowerPC_Vec_HHH_Intrinsic<"vsrah">;
 def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">;
+def int_ppc_altivec_vsraq : PowerPC_Vec_QQQ_Intrinsic<"vsraq">;
 
 // Rotates.
 def int_ppc_altivec_vrlb  : PowerPC_Vec_BBB_Intrinsic<"vrlb">;
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -21,6 +21,7 @@
 vector unsigned int vuia, vuib, vuic;
 vector signed long long vslla, vsllb;
 vector unsigned long long vulla, vullb, vullc;
+vector signed __int128 vi128a

[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-06-24 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Conanap added reviewers: power-llvm-team, PowerPC, nemanjai, saghir, hfinkel.
Conanap added projects: LLVM, clang, PowerPC.

Implemented following functions for Load VSX Vector Sign extend nad zero extend 
instructions:

  vector signed __int128 vec_xl_sext (signed long long, signed char *);
  vector signed __int128 vec_xl_sext (signed long long, signed short *);
  vector signed __int128 vec_xl_sext (signed long long, signed int *);
  vector signed __int128 vec_xl_sext (signed long long, signed long long *);
  
  vector unsigned __int128 vec_xl_zext (signed long long, unsigned char *);
  vector unsigned __int128 vec_xl_zext (signed long long, unsigned short *);
  vector unsigned __int128 vec_xl_zext (signed long long, unsigned int *);
  vector unsigned __int128 vec_xl_zext (signed long long, unsigned long long *);


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
  llvm/test/MC/Disassembler/PowerPC/p10insts.txt
  llvm/test/MC/PowerPC/p10.s

Index: llvm/test/MC/PowerPC/p10.s
===
--- llvm/test/MC/PowerPC/p10.s
+++ llvm/test/MC/PowerPC/p10.s
@@ -33,3 +33,15 @@
 # CHECK-BE: vclrrb 1, 4, 3# encoding: [0x10,0x24,0x19,0xcd]
 # CHECK-LE: vclrrb 1, 4, 3# encoding: [0xcd,0x19,0x24,0x10]
 vclrrb 1, 4, 3
+# CHECK-BE: lxvrbx 32, 1, 2   # encoding: [0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2   # encoding: [0x1b,0x10,0x01,0x7c]
+lxvrbx 32, 1, 2
+# CHECK-BE: lxvrhx 33, 1, 2   # encoding: [0x7c,0x21,0x10,0x5b]
+# CHECK-LE: lxvrhx 33, 1, 2   # encoding: [0x5b,0x10,0x21,0x7c]
+lxvrhx 33, 1, 2
+# CHECK-BE: lxvrdx 34, 1, 2   # encoding: [0x7c,0x41,0x10,0xdb]
+# CHECK-LE: lxvrdx 34, 1, 2   # encoding: [0xdb,0x10,0x41,0x7c]
+lxvrdx 34, 1, 2
+# CHECK-BE: lxvrwx 35, 1, 2   # encoding: [0x7c,0x61,0x10,0x9b]
+# CHECK-LE: lxvrwx 35, 1, 2   # encoding: [0x9b,0x10,0x61,0x7c]
+lxvrwx 35, 1, 2
Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt
===
--- llvm/test/MC/Disassembler/PowerPC/p10insts.txt
+++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt
@@ -30,3 +30,15 @@
 
 # CHECK: vclrrb 1, 4, 3
 0x10 0x24 0x19 0xcd
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
Index: llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; ZEXT TEST CASES
+
+; i8
+; CHECK: lxvrbx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; i16
+; CHECK: lxvrhx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; i32
+; CHECK: lxvrwx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr i

[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-06-25 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 273393.
Conanap added a comment.

Addressed Amy's comments regarding documentation of the changes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
  llvm/test/MC/Disassembler/PowerPC/p10insts.txt
  llvm/test/MC/PowerPC/p10.s

Index: llvm/test/MC/PowerPC/p10.s
===
--- llvm/test/MC/PowerPC/p10.s
+++ llvm/test/MC/PowerPC/p10.s
@@ -33,3 +33,15 @@
 # CHECK-BE: vclrrb 1, 4, 3# encoding: [0x10,0x24,0x19,0xcd]
 # CHECK-LE: vclrrb 1, 4, 3# encoding: [0xcd,0x19,0x24,0x10]
 vclrrb 1, 4, 3
+# CHECK-BE: lxvrbx 32, 1, 2   # encoding: [0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2   # encoding: [0x1b,0x10,0x01,0x7c]
+lxvrbx 32, 1, 2
+# CHECK-BE: lxvrhx 33, 1, 2   # encoding: [0x7c,0x21,0x10,0x5b]
+# CHECK-LE: lxvrhx 33, 1, 2   # encoding: [0x5b,0x10,0x21,0x7c]
+lxvrhx 33, 1, 2
+# CHECK-BE: lxvrdx 34, 1, 2   # encoding: [0x7c,0x41,0x10,0xdb]
+# CHECK-LE: lxvrdx 34, 1, 2   # encoding: [0xdb,0x10,0x41,0x7c]
+lxvrdx 34, 1, 2
+# CHECK-BE: lxvrwx 35, 1, 2   # encoding: [0x7c,0x61,0x10,0x9b]
+# CHECK-LE: lxvrwx 35, 1, 2   # encoding: [0x9b,0x10,0x61,0x7c]
+lxvrwx 35, 1, 2
Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt
===
--- llvm/test/MC/Disassembler/PowerPC/p10insts.txt
+++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt
@@ -30,3 +30,15 @@
 
 # CHECK: vclrrb 1, 4, 3
 0x10 0x24 0x19 0xcd
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
Index: llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; These test cases tests that zero extending loads utilize the Load VSX Vector Rightmost
+
+; (lxvr[b|h|w|d]) instructions in Power10.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; CHECK: lxvrbx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; CHECK: lxvrhx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; CHECK: lxvrwx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; CHECK: lxvrdx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 3
+; CHECK-NEXT:lxvrdx v2, r4, r3
+; CHECK

[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-06-25 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 273421.
Conanap marked 9 inline comments as done.
Conanap added a comment.

Fixed return signature for the open coded functions in altivec.h


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
  llvm/test/MC/Disassembler/PowerPC/p10insts.txt
  llvm/test/MC/PowerPC/p10.s

Index: llvm/test/MC/PowerPC/p10.s
===
--- llvm/test/MC/PowerPC/p10.s
+++ llvm/test/MC/PowerPC/p10.s
@@ -33,3 +33,15 @@
 # CHECK-BE: vclrrb 1, 4, 3# encoding: [0x10,0x24,0x19,0xcd]
 # CHECK-LE: vclrrb 1, 4, 3# encoding: [0xcd,0x19,0x24,0x10]
 vclrrb 1, 4, 3
+# CHECK-BE: lxvrbx 32, 1, 2   # encoding: [0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2   # encoding: [0x1b,0x10,0x01,0x7c]
+lxvrbx 32, 1, 2
+# CHECK-BE: lxvrhx 33, 1, 2   # encoding: [0x7c,0x21,0x10,0x5b]
+# CHECK-LE: lxvrhx 33, 1, 2   # encoding: [0x5b,0x10,0x21,0x7c]
+lxvrhx 33, 1, 2
+# CHECK-BE: lxvrdx 34, 1, 2   # encoding: [0x7c,0x41,0x10,0xdb]
+# CHECK-LE: lxvrdx 34, 1, 2   # encoding: [0xdb,0x10,0x41,0x7c]
+lxvrdx 34, 1, 2
+# CHECK-BE: lxvrwx 35, 1, 2   # encoding: [0x7c,0x61,0x10,0x9b]
+# CHECK-LE: lxvrwx 35, 1, 2   # encoding: [0x9b,0x10,0x61,0x7c]
+lxvrwx 35, 1, 2
Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt
===
--- llvm/test/MC/Disassembler/PowerPC/p10insts.txt
+++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt
@@ -30,3 +30,15 @@
 
 # CHECK: vclrrb 1, 4, 3
 0x10 0x24 0x19 0xcd
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
Index: llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; These test cases tests that zero extending loads utilize the Load VSX Vector Rightmost
+
+; (lxvr[b|h|w|d]) instructions in Power10.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; CHECK: lxvrbx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; CHECK: lxvrhx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; CHECK: lxvrwx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; CHECK: lxvrdx
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 3
+;

[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-06-29 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 274097.
Conanap marked an inline comment as done.
Conanap added a comment.

Reordered some declarations in test cases and removed unecessary extra CHECKs.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
  llvm/test/MC/Disassembler/PowerPC/p10insts.txt
  llvm/test/MC/PowerPC/p10.s

Index: llvm/test/MC/PowerPC/p10.s
===
--- llvm/test/MC/PowerPC/p10.s
+++ llvm/test/MC/PowerPC/p10.s
@@ -33,3 +33,15 @@
 # CHECK-BE: vclrrb 1, 4, 3# encoding: [0x10,0x24,0x19,0xcd]
 # CHECK-LE: vclrrb 1, 4, 3# encoding: [0xcd,0x19,0x24,0x10]
 vclrrb 1, 4, 3
+# CHECK-BE: lxvrbx 32, 1, 2   # encoding: [0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2   # encoding: [0x1b,0x10,0x01,0x7c]
+lxvrbx 32, 1, 2
+# CHECK-BE: lxvrhx 33, 1, 2   # encoding: [0x7c,0x21,0x10,0x5b]
+# CHECK-LE: lxvrhx 33, 1, 2   # encoding: [0x5b,0x10,0x21,0x7c]
+lxvrhx 33, 1, 2
+# CHECK-BE: lxvrdx 34, 1, 2   # encoding: [0x7c,0x41,0x10,0xdb]
+# CHECK-LE: lxvrdx 34, 1, 2   # encoding: [0xdb,0x10,0x41,0x7c]
+lxvrdx 34, 1, 2
+# CHECK-BE: lxvrwx 35, 1, 2   # encoding: [0x7c,0x61,0x10,0x9b]
+# CHECK-LE: lxvrwx 35, 1, 2   # encoding: [0x9b,0x10,0x61,0x7c]
+lxvrwx 35, 1, 2
Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt
===
--- llvm/test/MC/Disassembler/PowerPC/p10insts.txt
+++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt
@@ -30,3 +30,15 @@
 
 # CHECK: vclrrb 1, 4, 3
 0x10 0x24 0x19 0xcd
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
Index: llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; These test cases tests that zero extending loads utilize the Load VSX Vector Rightmost
+
+; (lxvr[b|h|w|d]x) instructions in Power10.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+; Function Attrs: norecurse nounwind readonly
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) local_unnamed_addr #0 {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 3
+; CHECK-NEXT:lxvrdx v2, r4, r3
+; CHECK-NEXT:b

[PATCH] D82502: [PowerPC] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-07-30 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 281974.
Conanap marked 2 inline comments as done.
Conanap removed a reviewer: power-llvm-team.
Conanap added a comment.
Herald added a subscriber: kbarton.

Some updates on formatting and updated to match an updated test file.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll

Index: llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
===
--- llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN:   FileCheck %s
+; RUN: FileCheck %s --check-prefix=CHECK-LE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN:   FileCheck %s
+; RUN: FileCheck %s --check-prefix=CHECK-BE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN:   FileCheck %s --check-prefix=CHECK-O0
+; RUN: FileCheck %s --check-prefix=CHECK-O0
 
 ; These test cases aims to test the builtins for the Power10 VSX vector
 ; instructions introduced in ISA 3.1.
@@ -15,13 +15,21 @@
 declare i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8>, i32)
 
 define signext i32 @test_vec_test_lsbb_all_ones(<16 x i8> %vuca) {
-; CHECK-LABEL: test_vec_test_lsbb_all_ones:
-; CHECK:   # %bb.0: # %entry
-; CHECK-NEXT:xvtlsbb cr0, v2
-; CHECK-NEXT:mfocrf r3, 128
-; CHECK-NEXT:srwi r3, r3, 31
-; CHECK-NEXT:extsw r3, r3
-; CHECK-NEXT:blr
+; CHECK-LE-LABEL: test_vec_test_lsbb_all_ones:
+; CHECK-LE:   # %bb.0: # %entry
+; CHECK-LE-NEXT:xvtlsbb cr0, v2
+; CHECK-LE-NEXT:mfocrf r3, 128
+; CHECK-LE-NEXT:srwi r3, r3, 31
+; CHECK-LE-NEXT:extsw r3, r3
+; CHECK-LE-NEXT:blr
+;
+; CHECK-BE-LABEL: test_vec_test_lsbb_all_ones:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:xvtlsbb cr0, v2
+; CHECK-BE-NEXT:mfocrf r3, 128
+; CHECK-BE-NEXT:srwi r3, r3, 31
+; CHECK-BE-NEXT:extsw r3, r3
+; CHECK-BE-NEXT:blr
 ;
 ; CHECK-O0-LABEL: test_vec_test_lsbb_all_ones:
 ; CHECK-O0:   # %bb.0: # %entry
@@ -36,13 +44,21 @@
 }
 
 define signext i32 @test_vec_test_lsbb_all_zeros(<16 x i8> %vuca) {
-; CHECK-LABEL: test_vec_test_lsbb_all_zeros:
-; CHECK:   # %bb.0: # %entry
-; CHECK-NEXT:xvtlsbb cr0, v2
-; CHECK-NEXT:mfocrf r3, 128
-; CHECK-NEXT:rlwinm r3, r3, 3, 31, 31
-; CHECK-NEXT:extsw r3, r3
-; CHECK-NEXT:blr
+; CHECK-LE-LABEL: test_vec_test_lsbb_all_zeros:
+; CHECK-LE:   # %bb.0: # %entry
+; CHECK-LE-NEXT:xvtlsbb cr0, v2
+; CHECK-LE-NEXT:mfocrf r3, 128
+; CHECK-LE-NEXT:rlwinm r3, r3, 3, 31, 31
+; CHECK-LE-NEXT:extsw r3, r3
+; CHECK-LE-NEXT:blr
+;
+; CHECK-BE-LABEL: test_vec_test_lsbb_all_zeros:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:xvtlsbb cr0, v2
+; CHECK-BE-NEXT:mfocrf r3, 128
+; CHECK-BE-NEXT:rlwinm r3, r3, 3, 31, 31
+; CHECK-BE-NEXT:extsw r3, r3
+; CHECK-BE-NEXT:blr
 ;
 ; CHECK-O0-LABEL: test_vec_test_lsbb_all_zeros:
 ; CHECK-O0:   # %bb.0: # %entry
@@ -57,10 +73,15 @@
 }
 
 define void @vec_xst_trunc_sc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_sc:
-; CHECK:   # %bb.0: # %entry
-; CHECK-NEXT:stxvrbx v2, r6, r5
-; CHECK-NEXT:blr
+; CHECK-LE-LABEL: vec_xst_trunc_sc:
+; CHECK-LE:   # %bb.0: # %entry
+; CHECK-LE-NEXT:stxvrbx v2, r6, r5
+; CHECK-LE-NEXT:blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_sc:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:stxvrbx v2, r6, r5
+; CHECK-BE-NEXT:blr
 ;
 ; CHECK-O0-LABEL: vec_xst_trunc_sc:
 ; CHECK-O0:   # %bb.0: # %entry
@@ -79,10 +100,15 @@
 }
 
 define void @vec_xst_trunc_uc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_uc:
-; CHECK:   # %bb.0: # %entry
-; CHECK-NEXT:stxvrbx v2, r6, r5
-; CHECK-NEXT:blr
+; CHECK-LE-LABEL: vec_xst_trunc_uc:
+; CHECK-LE:   # %bb.0: # %entry
+; CHECK-LE-NEXT:stxvrbx v2, r6, r5
+; CHECK-LE-NEXT:blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_uc:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:stxvrbx v2, r6, r5
+; CHECK-BE-NEXT:blr
 ;
 ; CHECK-O0-LABEL: vec_xst_trunc_uc:
 ; CHECK-O0:   # %bb.0: # %entry
@@ -101,11 +127,17 @@
 }
 
 define void @vec_xst_trunc_ss(<1 x i128> %__vec

[PATCH] D84962: [PowerPC] Correct cpsgn's behaviour on PowerPC to match that of the ABI

2020-07-30 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Conanap added reviewers: PowerPC, saghir, nemanjai, hfinkel.
Conanap added projects: LLVM, clang, PowerPC.
Herald added subscribers: cfe-commits, shchenz.
Conanap requested review of this revision.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D84962

Files:
  clang/lib/CodeGen/CGBuiltin.cpp


Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -14295,7 +14295,7 @@
 Value *Y = EmitScalarExpr(E->getArg(1));
 ID = Intrinsic::copysign;
 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
-return Builder.CreateCall(F, {X, Y});
+return Builder.CreateCall(F, {Y, X});
   }
   // Rounding/truncation
   case PPC::BI__builtin_vsx_xvrspip:


Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -14295,7 +14295,7 @@
 Value *Y = EmitScalarExpr(E->getArg(1));
 ID = Intrinsic::copysign;
 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
-return Builder.CreateCall(F, {X, Y});
+return Builder.CreateCall(F, {Y, X});
   }
   // Rounding/truncation
   case PPC::BI__builtin_vsx_xvrspip:
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D82502: [PowerPC] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-08-04 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 283061.
Conanap marked 2 inline comments as done.
Conanap added a comment.

Clang formatted relevant lines, combined LE and BE tests as they produced the 
same ASM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll

Index: llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
===
--- llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN:   FileCheck %s
+; RUN: FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN:   FileCheck %s
+; RUN: FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \
 ; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN:   FileCheck %s --check-prefix=CHECK-O0
+; RUN: FileCheck %s --check-prefix=CHECK-O0
 
 ; These test cases aims to test the builtins for the Power10 VSX vector
 ; instructions introduced in ISA 3.1.
@@ -239,3 +239,181 @@
   store i64 %conv, i64* %add.ptr, align 8
   ret void
 }
+
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+;
+; CHECK-O0-LABEL: vec_xl_zext:
+; CHECK-O0:   # %bb.0: # %entry
+; CHECK-O0-NEXT:lxvrbx vs0, r4, r3
+; CHECK-O0-NEXT:xxlor v2, vs0, vs0
+; CHECK-O0-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+;
+; CHECK-O0-LABEL: vec_xl_zext_short:
+; CHECK-O0:   # %bb.0: # %entry
+; CHECK-O0-NEXT:sldi r3, r3, 1
+; CHECK-O0-NEXT:lxvrhx vs0, r4, r3
+; CHECK-O0-NEXT:xxlor v2, vs0, vs0
+; CHECK-O0-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+;
+; CHECK-O0-LABEL: vec_xl_zext_word:
+; CHECK-O0:   # %bb.0: # %entry
+; CHECK-O0-NEXT:sldi r3, r3, 2
+; CHECK-O0-NEXT:lxvrwx vs0, r4, r3
+; CHECK-O0-NEXT:xxlor v2, vs0, vs0
+; CHECK-O0-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 3
+; CHECK-NEXT:lxvrdx v2, r4, r3
+; CHECK-NEXT:blr
+;
+; CHECK-O0-LABEL: vec_xl_zext_dw:
+; CHECK-O0:   # %bb.0: # %entry
+; CHECK-O0-NEXT:sldi r3, r3, 3
+; CHECK-O0-NEXT:lxvrdx vs0, r4, r3
+; CHECK-O0-NEXT:xxlor v2, vs0, vs0
+; CHECK-O0-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
+  %0 = load i64, i64* %add.ptr, align 8
+  %conv = zext i64 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_sext_b(i64 %offset, i8* %p) {
+; CHECK-LABEL: vec_xl_sext_b:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lbzx r3, r4, r3
+; CHECK-NEXT:extsb r3, r3
+; CHECK-NEXT:sradi r4, r3, 63
+; CHECK-NEXT:mtvsrdd v2, r4, r3
+; CHECK-NEXT:blr
+;
+; CHECK-O0-LABEL: vec_xl_sext_b:
+; CHECK-O0: 

[PATCH] D85453: [PowerPC] Implement __int128 vector divide operations

2020-08-06 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Conanap added reviewers: PowerPC, saghir, nemanjai, hfinkel.
Conanap added projects: LLVM, clang, PowerPC.
Herald added a subscriber: kbarton.
Conanap requested review of this revision.
Herald added a subscriber: wuzish.

This patch implements __int128 vector divide operations for ISA3.1.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D85453

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-divide.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
===
--- llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
+++ llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
@@ -49,3 +49,21 @@
   %div = sdiv <4 x i32> %a, %b
   ret <4 x i32> %div
 }
+
+define <1 x i128> @test_vdivsq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vdivsq:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vdivsq v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = sdiv <1 x i128> %x, %y
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vdivuq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vdivuq:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vdivuq v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = udiv <1 x i128> %x, %y
+  ret <1 x i128> %tmp
+}
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1124,9 +1124,11 @@
(ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
"vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, []>;
   def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-"vdivsq $vD, $vA, $vB", IIC_VecGeneral, []>;
+"vdivsq $vD, $vA, $vB", IIC_VecGeneral,
+[(set v1i128:$vD, (sdiv v1i128:$vA, v1i128:$vB))]>;
   def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-"vdivuq $vD, $vA, $vB", IIC_VecGeneral, []>;
+"vdivuq $vD, $vA, $vB", IIC_VecGeneral,
+[(set v1i128:$vD, (udiv v1i128:$vA, v1i128:$vB))]>;
   def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
  "vdivesq $vD, $vA, $vB", IIC_VecGeneral, []>;
   def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -836,6 +836,8 @@
   setOperationAction(ISD::SREM, MVT::v2i64, Legal);
   setOperationAction(ISD::UREM, MVT::v4i32, Legal);
   setOperationAction(ISD::SREM, MVT::v4i32, Legal);
+  setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
+  setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
 }
 
 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -17,6 +17,7 @@
 vector unsigned int vuia, vuib, vuic;
 vector signed long long vslla, vsllb;
 vector unsigned long long vulla, vullb, vullc;
+vector signed __int128 vsi128a, vsi128b;
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
@@ -61,6 +62,18 @@
   return vec_div(vulla, vullb);
 }
 
+vector unsigned __int128 test_vec_div_u128(void) {
+  // CHECK: udiv <1 x i128>
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_div(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_div_s128(void) {
+  // CHECK: sdiv <1 x i128>
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_div(vsi128a, vsi128b);
+}
+
 vector signed int test_vec_mod_si(void) {
   // CHECK: srem <4 x i32>
   // CHECK-NEXT: ret <4 x i32>
Index: clang/lib/Headers/altivec.h
===
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -3288,6 +3288,18 @@
 }
 #endif
 
+#ifdef __POWER10_VECTOR__
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_div(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __a / __b;
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_div(vector signed __int128 __a, vector signed __int128 __b) {
+  return __a / __b;
+}
+#endif __POWER10_VECTOR__
+
 /* vec_dss */
 
 #define vec_dss __builtin_altivec_dss
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83338: [PowerPC][Power10] Implemented Vector Shift Builtins

2020-08-07 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 283980.
Conanap marked 8 inline comments as done.
Conanap added a comment.

Added shl tests, formatting fixes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83338/new/

https://reviews.llvm.org/D83338

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-shift.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; These tests ensure that vector shift quadword builtins are correctly
+; exploited and selected for during codeGen.
+
+define dso_local <1 x i128> @test_vec_vslq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vslq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vslq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %rem = urem <1 x i128> %b, 
+  %shl = shl <1 x i128> %a, %rem
+  ret <1 x i128> %shl
+}
+
+define dso_local <1 x i128> @test_vec_vsrq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsrq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsrq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %rem = urem <1 x i128> %b, 
+  %shr = lshr <1 x i128> %a, %rem
+  ret <1 x i128> %shr
+}
+
+define dso_local <1 x i128> @test_vec_vsraq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsraq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsraq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %rem = urem <1 x i128> %b, 
+  %shr = ashr <1 x i128> %a, %rem
+  ret <1 x i128> %shr
+}
+
+define dso_local <1 x i128> @test_vec_vslq2(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vslq2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vslq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %shl = shl <1 x i128> %a, %b
+  ret <1 x i128> %shl
+}
+
+define dso_local <1 x i128> @test_vec_vsrq2(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsrq2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsrq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %shr = lshr <1 x i128> %a, %b
+  ret <1 x i128> %shr
+}
+
+define dso_local <1 x i128> @test_vec_vsraq2(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsraq2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsraq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %shr = ashr <1 x i128> %a, %b
+  ret <1 x i128> %shr
+}
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1176,6 +1176,19 @@
 (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
   def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
 (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
+
+  def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
 }
 
 let AddedComplexity = 400, Predicates = [IsISA3_1] in {
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1110,6 +1110,9 @@
 }
   }
 
+  if (Subtarget.isISA3_1())
+setOperationAction(ISD::SRA, MVT::v1i128, Legal);
+
   if (Subtarget.has64BitSupport())
 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
 
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -17,6 +17,7 @@
 vector unsigned int vuia, vuib, vuic;
 vector signed long long vslla, vsllb;
 vector unsigned long long vulla, vullb, vullc;
+vector signed __int128 vi128a;
 vector unsigned __int128 vui128

[PATCH] D83338: [PowerPC][Power10] Implemented Vector Shift Builtins

2020-08-07 Thread Albion Fung via Phabricator via cfe-commits
Conanap added inline comments.



Comment at: clang/lib/Headers/altivec.h:17243
+  // return (vector signed __int128)vec_sr((vector unsigned __int128)__a, __b);
+  return (vector signed __int128)(
+((vector unsigned __int128) __a) >>

amyk wrote:
> Could you please fix the indentation of the returns to make them all 
> consistent?
I ran clang-format this time, lmk if we should change this as it doesn't look 
like clang-format was too consistent with itself


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83338/new/

https://reviews.llvm.org/D83338

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83338: [PowerPC][Power10] Implemented Vector Shift Builtins

2020-08-12 Thread Albion Fung via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG3136cbe29e74: [PowerPC]  Implement Vector Shift Builtins 
(authored by Conanap).

Changed prior to commit:
  https://reviews.llvm.org/D83338?vs=283980&id=285208#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83338/new/

https://reviews.llvm.org/D83338

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-shift.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-shift.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; These test cases demonstrate that the vector shift quadword instructions
+; introduced within Power10 are correctly exploited.
+
+define dso_local <1 x i128> @test_vec_vslq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vslq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vslq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %rem = urem <1 x i128> %b, 
+  %shl = shl <1 x i128> %a, %rem
+  ret <1 x i128> %shl
+}
+
+define dso_local <1 x i128> @test_vec_vsrq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsrq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsrq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %rem = urem <1 x i128> %b, 
+  %shr = lshr <1 x i128> %a, %rem
+  ret <1 x i128> %shr
+}
+
+define dso_local <1 x i128> @test_vec_vsraq(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsraq:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsraq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %rem = urem <1 x i128> %b, 
+  %shr = ashr <1 x i128> %a, %rem
+  ret <1 x i128> %shr
+}
+
+define dso_local <1 x i128> @test_vec_vslq2(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vslq2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vslq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %shl = shl <1 x i128> %a, %b
+  ret <1 x i128> %shl
+}
+
+define dso_local <1 x i128> @test_vec_vsrq2(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsrq2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsrq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %shr = lshr <1 x i128> %a, %b
+  ret <1 x i128> %shr
+}
+
+define dso_local <1 x i128> @test_vec_vsraq2(<1 x i128> %a, <1 x i128> %b) {
+; CHECK-LABEL: test_vec_vsraq2:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vsraq v2, v2, v3
+; CHECK-NEXT:blr
+entry:
+  %shr = ashr <1 x i128> %a, %b
+  ret <1 x i128> %shr
+}
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1288,6 +1288,18 @@
 (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
   def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
 (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
+  def : Pat<(v1i128 (shl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCshl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSLQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (srl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCsrl v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (sra v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
+  def : Pat<(v1i128 (PPCsra v1i128:$VRA, v1i128:$VRB)),
+(v1i128 (VSRAQ v1i128:$VRA, v1i128:$VRB))>;
 }
 
 let AddedComplexity = 400, Predicates = [IsISA3_1] in {
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1128,6 +1128,9 @@
   if (Subtarget.has64BitSupport())
 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
 
+  if (Subtarget.isISA3_1())
+setOperationAction(ISD::SRA, MVT::v1i128, Legal);
+
   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
 
   if (!isPPC64) {
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===
--- clang/test/CodeGen/builtins-ppc-p10v

[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-15 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359066.
Conanap added a comment.

Added more tests, corrected sema checking and intrinsic flag


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPC.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX64
+
+declare void @llvm.ppc.dcbtstt(i8*)
+declare void @llvm.ppc.dcbtt(i8*)
+
+@vpa = external local_unnamed_addr global i8*, align 8
+
+define dso_local void @test_dcbtstt() {
+; CHECK-LABEL: test_dcbtstt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtstt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtstt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtstt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtstt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtstt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtstt(i8* %0)
+  ret void
+}
+
+
+define dso_local void @test_dcbtt() {
+; CHECK-LABEL: test_dcbtt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtt(i8* %0)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mftbu()
+declare i32 @llvm.ppc.mfmsr()
+
+define dso_local zeroext i32 @test_mftbu() {
+; CHECK-LABEL: test_mftbu:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mftbu 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_mftbu:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:mftbu 3
+; CHECK-AIX-NEXT:blr
+entry:
+  %0 = tail call i32 @llvm.ppc.mftbu()
+  ret i32 %0
+}
+
+define dso_local i64 @test_mfmsr() {
+; CHECK-LABEL: test_mfmsr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfmsr 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_mfmsr:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:mfmsr 4
+; CHECK-AIX-NEXT:li 3, 0

[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-15 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359097.
Conanap marked 2 inline comments as done.
Conanap added a comment.

Changed xoaddr, removed extws, changed check prefix


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPC.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX64
+
+declare void @llvm.ppc.dcbtstt(i8*)
+declare void @llvm.ppc.dcbtt(i8*)
+
+@vpa = external local_unnamed_addr global i8*, align 8
+
+define dso_local void @test_dcbtstt() {
+; CHECK-LABEL: test_dcbtstt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtstt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtstt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtstt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtstt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtstt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtstt(i8* %0)
+  ret void
+}
+
+
+define dso_local void @test_dcbtt() {
+; CHECK-LABEL: test_dcbtt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtt(i8* %0)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mftbu()
+declare i32 @llvm.ppc.mfmsr()
+
+define dso_local zeroext i32 @test_mftbu() {
+; CHECK-LABEL: test_mftbu:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mftbu 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = tail call i32 @llvm.ppc.mftbu()
+  ret i32 %0
+}
+
+define dso_local i64 @test_mfmsr() {
+; CHECK-LABEL: test_mfmsr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfmsr 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mfmsr:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BI

[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-15 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359104.
Conanap added a comment.

Changed more xoaddr to ForceXForm


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPC.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX64
+
+declare void @llvm.ppc.dcbtstt(i8*)
+declare void @llvm.ppc.dcbtt(i8*)
+
+@vpa = external local_unnamed_addr global i8*, align 8
+
+define dso_local void @test_dcbtstt() {
+; CHECK-LABEL: test_dcbtstt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtstt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtstt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtstt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtstt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtstt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtstt(i8* %0)
+  ret void
+}
+
+
+define dso_local void @test_dcbtt() {
+; CHECK-LABEL: test_dcbtt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtt(i8* %0)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mftbu()
+declare i32 @llvm.ppc.mfmsr()
+
+define dso_local zeroext i32 @test_mftbu() {
+; CHECK-LABEL: test_mftbu:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mftbu 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = tail call i32 @llvm.ppc.mftbu()
+  ret i32 %0
+}
+
+define dso_local i64 @test_mfmsr() {
+; CHECK-LABEL: test_mfmsr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfmsr 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mfmsr:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mfmsr 4
+; CHECK-32BIT-NEXT:li 3, 0
+; CHECK-

[PATCH] D106130: [PowerPC] Implemented mtmsr, mfspr, mtspr Builtins

2021-07-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Herald added subscribers: steven.zhang, shchenz, kbarton, hiraditya, nemanjai.
Conanap requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Implemented builtins for mtmsr, mfspr, mtspr on PowerPC;
the patch is intended for XL Compatibility.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D106130

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstr64Bit.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-64bit-only.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -1,15 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Had to manually modify the last test case (mtmsr) to allow the
+; NOTE: common check of mtmsr instead of 4 different check prefixes
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr7 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
-; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
 ; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
 
 declare i32 @llvm.ppc.mftbu()
 declare i32 @llvm.ppc.mfmsr()
+declare void @llvm.ppc.mtmsr(i32)
+
+@ula = external local_unnamed_addr global i64, align 8
 
 define dso_local zeroext i32 @test_mftbu() {
 ; CHECK-LABEL: test_mftbu:
@@ -18,10 +23,10 @@
 ; CHECK-NEXT:clrldi 3, 3, 32
 ; CHECK-NEXT:blr
 ;
-; CHECK-AIX-LABEL: test_mftbu:
-; CHECK-AIX:   # %bb.0: # %entry
-; CHECK-AIX-NEXT:mftbu 3
-; CHECK-AIX-NEXT:blr
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
 entry:
   %0 = tail call i32 @llvm.ppc.mftbu()
   ret i32 %0
@@ -34,13 +39,31 @@
 ; CHECK-NEXT:clrldi 3, 3, 32
 ; CHECK-NEXT:blr
 ;
-; CHECK-AIX-LABEL: test_mfmsr:
-; CHECK-AIX:   # %bb.0: # %entry
-; CHECK-AIX-NEXT:mfmsr 4
-; CHECK-AIX-NEXT:li 3, 0
-; CHECK-AIX-NEXT:blr
+; CHECK-32BIT-LABEL: test_mfmsr:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mfmsr 4
+; CHECK-32BIT-NEXT:li 3, 0
+; CHECK-32BIT-NEXT:blr
 entry:
   %0 = tail call i32 @llvm.ppc.mfmsr()
   %conv = zext i32 %0 to i64
   ret i64 %conv
 }
+
+define dso_local void @test_mtmsr() {
+; CHECK-LABEL: test_mtmsr:
+; CHECK:mtmsr 3
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mtmsr:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:lwz 3, L..C0(2) # @ula
+; CHECK-32BIT-NEXT:lwz 3, 4(3)
+; CHECK-32BIT-NEXT:mtmsr 3, 0
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = load i64, i64* @ula, align 8
+  %conv = trunc i64 %0 to i32
+  call void @llvm.ppc.mtmsr(i32 %conv)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mfspr.i32(i32 immarg)
+declare void @llvm.ppc.mtspr.i32(i32 immarg, i32)
+
+@ula = external dso_local global i32, align 4
+
+define dso_local i32 @test_mfxer() {
+; CHECK-LABEL: test_mfxer:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfxer 3
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 1)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mflr() {
+; CHECK-LABEL: test_mflr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 8
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 8)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfctr() {
+; CHECK-LABEL: test_mfctr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 9
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 9)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfppr() {
+; CHECK-LABEL: test_mfppr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 896
+; CHECK-NEXT: 

[PATCH] D106130: [PowerPC] Implemented mtmsr, mfspr, mtspr Builtins

2021-07-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359236.
Conanap added a comment.

Added sema checking


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106130/new/

https://reviews.llvm.org/D106130

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstr64Bit.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-64bit-only.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -1,15 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Had to manually modify the last test case (mtmsr) to allow the
+; NOTE: common check of mtmsr instead of 4 different check prefixes
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr7 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
-; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
 ; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
 
 declare i32 @llvm.ppc.mftbu()
 declare i32 @llvm.ppc.mfmsr()
+declare void @llvm.ppc.mtmsr(i32)
+
+@ula = external local_unnamed_addr global i64, align 8
 
 define dso_local zeroext i32 @test_mftbu() {
 ; CHECK-LABEL: test_mftbu:
@@ -18,10 +23,10 @@
 ; CHECK-NEXT:clrldi 3, 3, 32
 ; CHECK-NEXT:blr
 ;
-; CHECK-AIX-LABEL: test_mftbu:
-; CHECK-AIX:   # %bb.0: # %entry
-; CHECK-AIX-NEXT:mftbu 3
-; CHECK-AIX-NEXT:blr
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
 entry:
   %0 = tail call i32 @llvm.ppc.mftbu()
   ret i32 %0
@@ -34,13 +39,31 @@
 ; CHECK-NEXT:clrldi 3, 3, 32
 ; CHECK-NEXT:blr
 ;
-; CHECK-AIX-LABEL: test_mfmsr:
-; CHECK-AIX:   # %bb.0: # %entry
-; CHECK-AIX-NEXT:mfmsr 4
-; CHECK-AIX-NEXT:li 3, 0
-; CHECK-AIX-NEXT:blr
+; CHECK-32BIT-LABEL: test_mfmsr:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mfmsr 4
+; CHECK-32BIT-NEXT:li 3, 0
+; CHECK-32BIT-NEXT:blr
 entry:
   %0 = tail call i32 @llvm.ppc.mfmsr()
   %conv = zext i32 %0 to i64
   ret i64 %conv
 }
+
+define dso_local void @test_mtmsr() {
+; CHECK-LABEL: test_mtmsr:
+; CHECK:mtmsr 3
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mtmsr:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:lwz 3, L..C0(2) # @ula
+; CHECK-32BIT-NEXT:lwz 3, 4(3)
+; CHECK-32BIT-NEXT:mtmsr 3, 0
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = load i64, i64* @ula, align 8
+  %conv = trunc i64 %0 to i32
+  call void @llvm.ppc.mtmsr(i32 %conv)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mfspr.i32(i32 immarg)
+declare void @llvm.ppc.mtspr.i32(i32 immarg, i32)
+
+@ula = external dso_local global i32, align 4
+
+define dso_local i32 @test_mfxer() {
+; CHECK-LABEL: test_mfxer:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfxer 3
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 1)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mflr() {
+; CHECK-LABEL: test_mflr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 8
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 8)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfctr() {
+; CHECK-LABEL: test_mfctr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 9
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 9)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfppr() {
+; CHECK-LABEL: test_mfppr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 896
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 896)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfppr32() {
+; CHECK-LABEL: test_mfppr32:
+

[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359259.
Conanap added a comment.

Updated lharx and lbarx to inline asm implementation, implemented stfiw.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c
  clang/test/CodeGen/builtins-ppc-xlcompat-stfiw.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPC.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-stfiw.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX64
+
+declare void @llvm.ppc.dcbtstt(i8*)
+declare void @llvm.ppc.dcbtt(i8*)
+
+@vpa = external local_unnamed_addr global i8*, align 8
+
+define dso_local void @test_dcbtstt() {
+; CHECK-LABEL: test_dcbtstt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtstt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtstt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtstt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtstt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtstt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtstt(i8* %0)
+  ret void
+}
+
+
+define dso_local void @test_dcbtt() {
+; CHECK-LABEL: test_dcbtt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtt(i8* %0)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mftbu()
+declare i32 @llvm.ppc.mfmsr()
+
+define dso_local zeroext i32 @test_mftbu() {
+; CHECK-LABEL: test_mftbu:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mftbu 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = tail call i32 @llvm.ppc.mftbu()
+  ret i32 %0
+}
+
+define dso_local i64 @test_mfmsr() {
+; CHECK-LABEL: test_mfmsr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfmsr 3
+; CHECK-NEXT:  

[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359322.
Conanap marked 5 inline comments as done.
Conanap added a comment.

Added non-vsx pattern for stfiw, extra testline for that pattern,
some nits


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c
  clang/test/CodeGen/builtins-ppc-xlcompat-stfiw.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPC.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-stfiw.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX64
+
+declare void @llvm.ppc.dcbtstt(i8*)
+declare void @llvm.ppc.dcbtt(i8*)
+
+@vpa = external local_unnamed_addr global i8*, align 8
+
+define dso_local void @test_dcbtstt() {
+; CHECK-LABEL: test_dcbtstt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtstt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtstt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtstt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtstt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtstt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtstt(i8* %0)
+  ret void
+}
+
+
+define dso_local void @test_dcbtt() {
+; CHECK-LABEL: test_dcbtt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtt(i8* %0)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mftbu()
+declare i32 @llvm.ppc.mfmsr()
+
+define dso_local zeroext i32 @test_mftbu() {
+; CHECK-LABEL: test_mftbu:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mftbu 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = tail call i32 @llvm.ppc.mftbu()
+  ret i32 %0
+}
+
+define dso_local i64 @test_mfmsr() {
+; CHECK-LABEL: test_mfmsr:
+; CHECK:   # %bb.0: # %ent

[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359380.
Conanap added a comment.

Updated a test case


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c
  clang/test/CodeGen/builtins-ppc-xlcompat-stfiw.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPC.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-stfiw.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX64
+
+declare void @llvm.ppc.dcbtstt(i8*)
+declare void @llvm.ppc.dcbtt(i8*)
+
+@vpa = external local_unnamed_addr global i8*, align 8
+
+define dso_local void @test_dcbtstt() {
+; CHECK-LABEL: test_dcbtstt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtstt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtstt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtstt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtstt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtstt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtstt(i8* %0)
+  ret void
+}
+
+
+define dso_local void @test_dcbtt() {
+; CHECK-LABEL: test_dcbtt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtt(i8* %0)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mftbu()
+declare i32 @llvm.ppc.mfmsr()
+
+define dso_local zeroext i32 @test_mftbu() {
+; CHECK-LABEL: test_mftbu:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mftbu 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = tail call i32 @llvm.ppc.mftbu()
+  ret i32 %0
+}
+
+define dso_local i64 @test_mfmsr() {
+; CHECK-LABEL: test_mfmsr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfmsr 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-3

[PATCH] D106130: [PowerPC] Implemented mtmsr, mfspr, mtspr Builtins

2021-07-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359400.
Conanap marked 4 inline comments as done.
Conanap added a comment.

Changed `i32` to `timm` for patterns, some nits


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106130/new/

https://reviews.llvm.org/D106130

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstr64Bit.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-64bit-only.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -1,15 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Had to manually modify the last test case (mtmsr) to allow the
+; NOTE: common check of mtmsr instead of 4 different check prefixes
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr7 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
-; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
 ; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
 
 declare i32 @llvm.ppc.mftbu()
 declare i32 @llvm.ppc.mfmsr()
+declare void @llvm.ppc.mtmsr(i32)
+
+@ula = external local_unnamed_addr global i64, align 8
 
 define dso_local zeroext i32 @test_mftbu() {
 ; CHECK-LABEL: test_mftbu:
@@ -18,10 +23,10 @@
 ; CHECK-NEXT:clrldi 3, 3, 32
 ; CHECK-NEXT:blr
 ;
-; CHECK-AIX-LABEL: test_mftbu:
-; CHECK-AIX:   # %bb.0: # %entry
-; CHECK-AIX-NEXT:mftbu 3
-; CHECK-AIX-NEXT:blr
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
 entry:
   %0 = tail call i32 @llvm.ppc.mftbu()
   ret i32 %0
@@ -34,13 +39,31 @@
 ; CHECK-NEXT:clrldi 3, 3, 32
 ; CHECK-NEXT:blr
 ;
-; CHECK-AIX-LABEL: test_mfmsr:
-; CHECK-AIX:   # %bb.0: # %entry
-; CHECK-AIX-NEXT:mfmsr 4
-; CHECK-AIX-NEXT:li 3, 0
-; CHECK-AIX-NEXT:blr
+; CHECK-32BIT-LABEL: test_mfmsr:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mfmsr 4
+; CHECK-32BIT-NEXT:li 3, 0
+; CHECK-32BIT-NEXT:blr
 entry:
   %0 = tail call i32 @llvm.ppc.mfmsr()
   %conv = zext i32 %0 to i64
   ret i64 %conv
 }
+
+define dso_local void @test_mtmsr() {
+; CHECK-LABEL: test_mtmsr:
+; CHECK:mtmsr 3
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mtmsr:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:lwz 3, L..C0(2) # @ula
+; CHECK-32BIT-NEXT:lwz 3, 4(3)
+; CHECK-32BIT-NEXT:mtmsr 3, 0
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = load i64, i64* @ula, align 8
+  %conv = trunc i64 %0 to i32
+  call void @llvm.ppc.mtmsr(i32 %conv)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mfspr.i32(i32 immarg)
+declare void @llvm.ppc.mtspr.i32(i32 immarg, i32)
+
+@ula = external dso_local global i32, align 4
+
+define dso_local i32 @test_mfxer() {
+; CHECK-LABEL: test_mfxer:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfxer 3
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 1)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mflr() {
+; CHECK-LABEL: test_mflr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 8
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 8)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfctr() {
+; CHECK-LABEL: test_mfctr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 9
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 9)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfppr() {
+; CHECK-LABEL: test_mfppr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 896
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 896)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfppr32

[PATCH] D106130: [PowerPC] Implemented mtmsr, mfspr, mtspr Builtins

2021-07-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:15586
+  case PPC::BI__builtin_ppc_mfspr: {
+llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 
32
+  ? Int32Ty

nemanjai wrote:
> Is this the formatting that `clang-format` produces? Seems surprising it 
> would format it that way.
I ran `git clang-format HEAD^` and it didn't change it, so seems like it was 
happy with this.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106130/new/

https://reviews.llvm.org/D106130

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106130: [PowerPC] Implemented mtmsr, mfspr, mtspr Builtins

2021-07-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359424.
Conanap added a comment.

added an error test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106130/new/

https://reviews.llvm.org/D106130

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-error.c
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstr64Bit.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-64bit-only.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -1,15 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Had to manually modify the last test case (mtmsr) to allow the
+; NOTE: common check of mtmsr instead of 4 different check prefixes
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr7 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
-; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
 ; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
 
 declare i32 @llvm.ppc.mftbu()
 declare i32 @llvm.ppc.mfmsr()
+declare void @llvm.ppc.mtmsr(i32)
+
+@ula = external local_unnamed_addr global i64, align 8
 
 define dso_local zeroext i32 @test_mftbu() {
 ; CHECK-LABEL: test_mftbu:
@@ -18,10 +23,10 @@
 ; CHECK-NEXT:clrldi 3, 3, 32
 ; CHECK-NEXT:blr
 ;
-; CHECK-AIX-LABEL: test_mftbu:
-; CHECK-AIX:   # %bb.0: # %entry
-; CHECK-AIX-NEXT:mftbu 3
-; CHECK-AIX-NEXT:blr
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
 entry:
   %0 = tail call i32 @llvm.ppc.mftbu()
   ret i32 %0
@@ -34,13 +39,31 @@
 ; CHECK-NEXT:clrldi 3, 3, 32
 ; CHECK-NEXT:blr
 ;
-; CHECK-AIX-LABEL: test_mfmsr:
-; CHECK-AIX:   # %bb.0: # %entry
-; CHECK-AIX-NEXT:mfmsr 4
-; CHECK-AIX-NEXT:li 3, 0
-; CHECK-AIX-NEXT:blr
+; CHECK-32BIT-LABEL: test_mfmsr:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mfmsr 4
+; CHECK-32BIT-NEXT:li 3, 0
+; CHECK-32BIT-NEXT:blr
 entry:
   %0 = tail call i32 @llvm.ppc.mfmsr()
   %conv = zext i32 %0 to i64
   ret i64 %conv
 }
+
+define dso_local void @test_mtmsr() {
+; CHECK-LABEL: test_mtmsr:
+; CHECK:mtmsr 3
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mtmsr:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:lwz 3, L..C0(2) # @ula
+; CHECK-32BIT-NEXT:lwz 3, 4(3)
+; CHECK-32BIT-NEXT:mtmsr 3, 0
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = load i64, i64* @ula, align 8
+  %conv = trunc i64 %0 to i32
+  call void @llvm.ppc.mtmsr(i32 %conv)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mfspr.i32(i32 immarg)
+declare void @llvm.ppc.mtspr.i32(i32 immarg, i32)
+
+@ula = external dso_local global i32, align 4
+
+define dso_local i32 @test_mfxer() {
+; CHECK-LABEL: test_mfxer:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfxer 3
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 1)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mflr() {
+; CHECK-LABEL: test_mflr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 8
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 8)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfctr() {
+; CHECK-LABEL: test_mfctr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 9
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 9)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfppr() {
+; CHECK-LABEL: test_mfppr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 896
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 896)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfppr32() {
+; CHECK-LABEL

[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359433.
Conanap marked an inline comment as done.
Conanap added a comment.

Updated test cases


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c
  clang/test/CodeGen/builtins-ppc-xlcompat-stfiw.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPC.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-stfiw.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX64
+
+declare void @llvm.ppc.dcbtstt(i8*)
+declare void @llvm.ppc.dcbtt(i8*)
+
+@vpa = external local_unnamed_addr global i8*, align 8
+
+define dso_local void @test_dcbtstt() {
+; CHECK-LABEL: test_dcbtstt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtstt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtstt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtstt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtstt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtstt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtstt(i8* %0)
+  ret void
+}
+
+
+define dso_local void @test_dcbtt() {
+; CHECK-LABEL: test_dcbtt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtt(i8* %0)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mftbu()
+declare i32 @llvm.ppc.mfmsr()
+
+define dso_local zeroext i32 @test_mftbu() {
+; CHECK-LABEL: test_mftbu:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mftbu 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = tail call i32 @llvm.ppc.mftbu()
+  ret i32 %0
+}
+
+define dso_local i64 @test_mfmsr() {
+; CHECK-LABEL: test_mfmsr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfmsr 3
+; CHECK-NEXT:clrldi 3, 

[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap marked 4 inline comments as done.
Conanap added a comment.

Addressed comments




Comment at: clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c:15
+  // CHECK-LABEL: @test_lwarx
   // CHECK: %0 = tail call i32 asm sideeffect "lwarx $0, ${1:y}", 
"=r,*Z,~{memory}"(i32* %a)
   return __lwarx(a);

lei wrote:
> where is the check for `CHECK-NON-PWR8-ERR:`?
`lwarx` and `stwcx` are both available before power8, so the check is not 
needed.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359516.
Conanap marked an inline comment as done.
Conanap added a comment.

Moved pattern to a more appropriate place, updated test cases


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c
  clang/test/CodeGen/builtins-ppc-xlcompat-stfiw.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPC.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-stfiw.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX64
+
+declare void @llvm.ppc.dcbtstt(i8*)
+declare void @llvm.ppc.dcbtt(i8*)
+
+@vpa = external local_unnamed_addr global i8*, align 8
+
+define dso_local void @test_dcbtstt() {
+; CHECK-LABEL: test_dcbtstt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtstt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtstt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtstt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtstt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtstt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtstt(i8* %0)
+  ret void
+}
+
+
+define dso_local void @test_dcbtt() {
+; CHECK-LABEL: test_dcbtt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtt(i8* %0)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mftbu()
+declare i32 @llvm.ppc.mfmsr()
+
+define dso_local zeroext i32 @test_mftbu() {
+; CHECK-LABEL: test_mftbu:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mftbu 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = tail call i32 @llvm.ppc.mftbu()
+  ret i32 %0
+}
+
+define dso_local i64 @test_mfmsr() {
+; CHECK-LABEL: test_mfmsr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NE

[PATCH] D105926: [PowerPC] Extra test case for LDARX

2021-07-19 Thread Albion Fung via Phabricator via cfe-commits
Conanap added a comment.

In D105926#2885291 , @teemperor wrote:

> Sorry for raising an unrelated topic here, but I can't reach @Conanap 
> directly via the mail from the git commits: @Conanap could you please create 
> the git branches for your patches in your own Github fork instead of the main 
> LLVM repo? LLVM's policy is to have working branches in everyone's private 
> fork (even though I don't think we explicitly tell people that when they get 
> commit access). I'll go ahead and delete your created branches end of next 
> week, but let me know if I should wait a bit longer with that. Thanks!

Ah apologies, I'll do that. Sorry about that! I'll keep the rest of the 
branches on my personal repo; please feel free to delete these by the end of 
next week.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105926/new/

https://reviews.llvm.org/D105926

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D105926: [PowerPC] Extra test case for LDARX

2021-07-19 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359681.
Conanap added a comment.

Removed metadata, -O3 to generate the test case


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105926/new/

https://reviews.llvm.org/D105926

Files:
  clang/test/CodeGen/builtins-ppc-xlcompat-check-ldarx-opt.ll


Index: clang/test/CodeGen/builtins-ppc-xlcompat-check-ldarx-opt.ll
===
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-xlcompat-check-ldarx-opt.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-AIX
+
+; Function Attrs: nounwind uwtable
+define dso_local signext i32 @main() local_unnamed_addr {
+; CHECK-LABEL: main:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li 3, -1
+; CHECK-NEXT:li 4, 0
+; CHECK-NEXT:std 3, -8(1)
+; CHECK-NEXT:addi 3, 1, -8
+; CHECK-NEXT:.p2align 5
+; CHECK-NEXT:  .LBB0_1: # %do.body
+; CHECK-NEXT:#
+; CHECK-NEXT:#APP
+; CHECK-NEXT:ldarx 5, 0, 3
+; CHECK-NEXT:#NO_APP
+; CHECK-NEXT:stdcx. 4, 0, 3
+; CHECK-NEXT:mfocrf 5, 128
+; CHECK-NEXT:srwi 5, 5, 28
+; CHECK-NEXT:cmplwi 5, 0
+; CHECK-NEXT:beq 0, .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %do.end
+; CHECK-NEXT:ld 3, -8(1)
+; CHECK-NEXT:li 4, 55
+; CHECK-NEXT:cmpldi 3, 0
+; CHECK-NEXT:li 3, 66
+; CHECK-NEXT:iseleq 3, 4, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: main:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:li 3, -1
+; CHECK-AIX-NEXT:li 4, 0
+; CHECK-AIX-NEXT:std 3, -8(1)
+; CHECK-AIX-NEXT:addi 3, 1, -8
+; CHECK-AIX-NEXT:.align 5
+; CHECK-AIX-NEXT:  L..BB0_1: # %do.body
+; CHECK-AIX-NEXT:#
+; CHECK-AIX-NEXT:#APP
+; CHECK-AIX-NEXT:ldarx 5, 0, 3
+; CHECK-AIX-NEXT:#NO_APP
+; CHECK-AIX-NEXT:stdcx. 4, 0, 3
+; CHECK-AIX-NEXT:mfocrf 5, 128
+; CHECK-AIX-NEXT:srwi 5, 5, 28
+; CHECK-AIX-NEXT:cmplwi 5, 0
+; CHECK-AIX-NEXT:beq 0, L..BB0_1
+; CHECK-AIX-NEXT:  # %bb.2: # %do.end
+; CHECK-AIX-NEXT:ld 3, -8(1)
+; CHECK-AIX-NEXT:li 4, 55
+; CHECK-AIX-NEXT:cmpldi 3, 0
+; CHECK-AIX-NEXT:li 3, 66
+; CHECK-AIX-NEXT:iseleq 3, 4, 3
+; CHECK-AIX-NEXT:blr
+entry:
+  %x64 = alloca i64, align 8
+  %0 = bitcast i64* %x64 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
+  store i64 -1, i64* %x64, align 8
+  br label %do.body
+
+do.body:  ; preds = %do.body, %entry
+  %1 = call i64 asm sideeffect "ldarx $0, ${1:y}", "=r,*Z,~{memory}"(i64* 
nonnull %x64)
+  %2 = call i32 @llvm.ppc.stdcx(i8* nonnull %0, i64 0)
+  %tobool.not = icmp eq i32 %2, 0
+  br i1 %tobool.not, label %do.body, label %do.end
+
+do.end:   ; preds = %do.body
+  %3 = load i64, i64* %x64, align 8
+  %cmp = icmp eq i64 %3, 0
+  %. = select i1 %cmp, i32 55, i32 66
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
+  ret i32 %.
+}
+
+; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+; Function Attrs: nounwind writeonly
+declare i32 @llvm.ppc.stdcx(i8*, i64)
+
+; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)


Index: clang/test/CodeGen/builtins-ppc-xlcompat-check-ldarx-opt.ll
===
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-xlcompat-check-ldarx-opt.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-AIX
+
+; Function Attrs: nounwind uwtable
+define dso_local signext i32 @main() local_unnamed_addr {
+; CHECK-LABEL: main:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li 3, -1
+; CHECK-NEXT:li 4, 0
+; CHECK-NEXT:std 3, -8(1)
+; CHECK-NEXT:addi 3, 1, -8
+; CHECK-NEXT:.p2align 5
+; CHECK-NEXT:  .LBB0_1: # %do.body
+; CHECK-NEXT:#
+; CHECK-NEXT:#APP
+; CHECK-NEXT:ldarx 5, 0, 3
+; CHECK-NEXT:#NO_APP
+; CHECK-NEXT:stdcx. 4, 0, 3
+; CHECK-NEXT:mfocrf 5, 128
+; CHECK-NE

[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-19 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359850.
Conanap added a comment.

Changed flags for intrinsic of dcbtt and dcbtstt


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c
  clang/test/CodeGen/builtins-ppc-xlcompat-stfiw.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPC.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-stfiw.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX64
+
+declare void @llvm.ppc.dcbtstt(i8*)
+declare void @llvm.ppc.dcbtt(i8*)
+
+@vpa = external local_unnamed_addr global i8*, align 8
+
+define dso_local void @test_dcbtstt() {
+; CHECK-LABEL: test_dcbtstt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtstt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtstt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtstt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtstt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtstt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtstt(i8* %0)
+  ret void
+}
+
+
+define dso_local void @test_dcbtt() {
+; CHECK-LABEL: test_dcbtt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtt(i8* %0)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mftbu()
+declare i32 @llvm.ppc.mfmsr()
+
+define dso_local zeroext i32 @test_mftbu() {
+; CHECK-LABEL: test_mftbu:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mftbu 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = tail call i32 @llvm.ppc.mftbu()
+  ret i32 %0
+}
+
+define dso_local i64 @test_mfmsr() {
+; CHECK-LABEL: test_mfmsr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfmsr 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHE

[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-19 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 359945.
Conanap added a comment.

Fixed a typo


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c
  clang/test/CodeGen/builtins-ppc-xlcompat-stfiw.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPC.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-stfiw.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX64
+
+declare void @llvm.ppc.dcbtstt(i8*)
+declare void @llvm.ppc.dcbtt(i8*)
+
+@vpa = external local_unnamed_addr global i8*, align 8
+
+define dso_local void @test_dcbtstt() {
+; CHECK-LABEL: test_dcbtstt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtstt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtstt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtstt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtstt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtstt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtstt(i8* %0)
+  ret void
+}
+
+
+define dso_local void @test_dcbtt() {
+; CHECK-LABEL: test_dcbtt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtt(i8* %0)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mftbu()
+declare i32 @llvm.ppc.mfmsr()
+
+define dso_local zeroext i32 @test_mftbu() {
+; CHECK-LABEL: test_mftbu:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mftbu 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = tail call i32 @llvm.ppc.mftbu()
+  ret i32 %0
+}
+
+define dso_local i64 @test_mfmsr() {
+; CHECK-LABEL: test_mfmsr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfmsr 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LA

[PATCH] D105926: [PowerPC] Extra test case for LDARX

2021-07-19 Thread Albion Fung via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG1d3e77e7a842: [PowerPC] Extra test case for LDARX (authored 
by Conanap).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105926/new/

https://reviews.llvm.org/D105926

Files:
  clang/test/CodeGen/builtins-ppc-xlcompat-check-ldarx-opt.ll


Index: clang/test/CodeGen/builtins-ppc-xlcompat-check-ldarx-opt.ll
===
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-xlcompat-check-ldarx-opt.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-AIX
+
+; Function Attrs: nounwind uwtable
+define dso_local signext i32 @main() local_unnamed_addr {
+; CHECK-LABEL: main:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li 3, -1
+; CHECK-NEXT:li 4, 0
+; CHECK-NEXT:std 3, -8(1)
+; CHECK-NEXT:addi 3, 1, -8
+; CHECK-NEXT:.p2align 5
+; CHECK-NEXT:  .LBB0_1: # %do.body
+; CHECK-NEXT:#
+; CHECK-NEXT:#APP
+; CHECK-NEXT:ldarx 5, 0, 3
+; CHECK-NEXT:#NO_APP
+; CHECK-NEXT:stdcx. 4, 0, 3
+; CHECK-NEXT:mfocrf 5, 128
+; CHECK-NEXT:srwi 5, 5, 28
+; CHECK-NEXT:cmplwi 5, 0
+; CHECK-NEXT:beq 0, .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %do.end
+; CHECK-NEXT:ld 3, -8(1)
+; CHECK-NEXT:li 4, 55
+; CHECK-NEXT:cmpldi 3, 0
+; CHECK-NEXT:li 3, 66
+; CHECK-NEXT:iseleq 3, 4, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: main:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:li 3, -1
+; CHECK-AIX-NEXT:li 4, 0
+; CHECK-AIX-NEXT:std 3, -8(1)
+; CHECK-AIX-NEXT:addi 3, 1, -8
+; CHECK-AIX-NEXT:.align 5
+; CHECK-AIX-NEXT:  L..BB0_1: # %do.body
+; CHECK-AIX-NEXT:#
+; CHECK-AIX-NEXT:#APP
+; CHECK-AIX-NEXT:ldarx 5, 0, 3
+; CHECK-AIX-NEXT:#NO_APP
+; CHECK-AIX-NEXT:stdcx. 4, 0, 3
+; CHECK-AIX-NEXT:mfocrf 5, 128
+; CHECK-AIX-NEXT:srwi 5, 5, 28
+; CHECK-AIX-NEXT:cmplwi 5, 0
+; CHECK-AIX-NEXT:beq 0, L..BB0_1
+; CHECK-AIX-NEXT:  # %bb.2: # %do.end
+; CHECK-AIX-NEXT:ld 3, -8(1)
+; CHECK-AIX-NEXT:li 4, 55
+; CHECK-AIX-NEXT:cmpldi 3, 0
+; CHECK-AIX-NEXT:li 3, 66
+; CHECK-AIX-NEXT:iseleq 3, 4, 3
+; CHECK-AIX-NEXT:blr
+entry:
+  %x64 = alloca i64, align 8
+  %0 = bitcast i64* %x64 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
+  store i64 -1, i64* %x64, align 8
+  br label %do.body
+
+do.body:  ; preds = %do.body, %entry
+  %1 = call i64 asm sideeffect "ldarx $0, ${1:y}", "=r,*Z,~{memory}"(i64* 
nonnull %x64)
+  %2 = call i32 @llvm.ppc.stdcx(i8* nonnull %0, i64 0)
+  %tobool.not = icmp eq i32 %2, 0
+  br i1 %tobool.not, label %do.body, label %do.end
+
+do.end:   ; preds = %do.body
+  %3 = load i64, i64* %x64, align 8
+  %cmp = icmp eq i64 %3, 0
+  %. = select i1 %cmp, i32 55, i32 66
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
+  ret i32 %.
+}
+
+; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+; Function Attrs: nounwind writeonly
+declare i32 @llvm.ppc.stdcx(i8*, i64)
+
+; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)


Index: clang/test/CodeGen/builtins-ppc-xlcompat-check-ldarx-opt.ll
===
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-xlcompat-check-ldarx-opt.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-AIX
+
+; Function Attrs: nounwind uwtable
+define dso_local signext i32 @main() local_unnamed_addr {
+; CHECK-LABEL: main:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li 3, -1
+; CHECK-NEXT:li 4, 0
+; CHECK-NEXT:std 3, -8(1)
+; CHECK-NEXT:addi 3, 1, -8
+; CHECK-NEXT:.p2align 5
+; CHECK-NEXT:  .LBB0_1: # %do.body
+; CHECK-NEXT:#
+; CHECK-NEXT:#APP
+; CHECK-NEXT:ldarx 5, 0, 3

[PATCH] D105926: [PowerPC] Extra test case for LDARX

2021-07-19 Thread Albion Fung via Phabricator via cfe-commits
Conanap reopened this revision.
Conanap added a comment.
This revision is now accepted and ready to land.

Had to revert this as I'm seeing failures on buildbots not owned by us. The 
error is:

  llc: error: : error: unable to get target for 
'powerpc64le-unknown-linux-gnu', see --version and --triple.

Note that when testing on local machines I did not encounter this error, 
neither did PowerPC buildbots, so I'll need to look into it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105926/new/

https://reviews.llvm.org/D105926

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D105926: [PowerPC] Extra test case for LDARX

2021-07-20 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 360114.
Conanap added a comment.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Moved to llvm/test/CodeGen/PowerPC


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105926/new/

https://reviews.llvm.org/D105926

Files:
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll


Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-AIX
+
+; Function Attrs: nounwind uwtable
+define dso_local signext i32 @main() local_unnamed_addr {
+; CHECK-LABEL: main:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li 3, -1
+; CHECK-NEXT:li 4, 0
+; CHECK-NEXT:std 3, -8(1)
+; CHECK-NEXT:addi 3, 1, -8
+; CHECK-NEXT:.p2align 5
+; CHECK-NEXT:  .LBB0_1: # %do.body
+; CHECK-NEXT:#
+; CHECK-NEXT:#APP
+; CHECK-NEXT:ldarx 5, 0, 3
+; CHECK-NEXT:#NO_APP
+; CHECK-NEXT:stdcx. 4, 0, 3
+; CHECK-NEXT:mfocrf 5, 128
+; CHECK-NEXT:srwi 5, 5, 28
+; CHECK-NEXT:cmplwi 5, 0
+; CHECK-NEXT:beq 0, .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %do.end
+; CHECK-NEXT:ld 3, -8(1)
+; CHECK-NEXT:li 4, 55
+; CHECK-NEXT:cmpldi 3, 0
+; CHECK-NEXT:li 3, 66
+; CHECK-NEXT:iseleq 3, 4, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: main:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:li 3, -1
+; CHECK-AIX-NEXT:li 4, 0
+; CHECK-AIX-NEXT:std 3, -8(1)
+; CHECK-AIX-NEXT:addi 3, 1, -8
+; CHECK-AIX-NEXT:.align 5
+; CHECK-AIX-NEXT:  L..BB0_1: # %do.body
+; CHECK-AIX-NEXT:#
+; CHECK-AIX-NEXT:#APP
+; CHECK-AIX-NEXT:ldarx 5, 0, 3
+; CHECK-AIX-NEXT:#NO_APP
+; CHECK-AIX-NEXT:stdcx. 4, 0, 3
+; CHECK-AIX-NEXT:mfocrf 5, 128
+; CHECK-AIX-NEXT:srwi 5, 5, 28
+; CHECK-AIX-NEXT:cmplwi 5, 0
+; CHECK-AIX-NEXT:beq 0, L..BB0_1
+; CHECK-AIX-NEXT:  # %bb.2: # %do.end
+; CHECK-AIX-NEXT:ld 3, -8(1)
+; CHECK-AIX-NEXT:li 4, 55
+; CHECK-AIX-NEXT:cmpldi 3, 0
+; CHECK-AIX-NEXT:li 3, 66
+; CHECK-AIX-NEXT:iseleq 3, 4, 3
+; CHECK-AIX-NEXT:blr
+entry:
+  %x64 = alloca i64, align 8
+  %0 = bitcast i64* %x64 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
+  store i64 -1, i64* %x64, align 8
+  br label %do.body
+
+do.body:  ; preds = %do.body, %entry
+  %1 = call i64 asm sideeffect "ldarx $0, ${1:y}", "=r,*Z,~{memory}"(i64* 
nonnull %x64)
+  %2 = call i32 @llvm.ppc.stdcx(i8* nonnull %0, i64 0)
+  %tobool.not = icmp eq i32 %2, 0
+  br i1 %tobool.not, label %do.body, label %do.end
+
+do.end:   ; preds = %do.body
+  %3 = load i64, i64* %x64, align 8
+  %cmp = icmp eq i64 %3, 0
+  %. = select i1 %cmp, i32 55, i32 66
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
+  ret i32 %.
+}
+
+; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+; Function Attrs: nounwind writeonly
+declare i32 @llvm.ppc.stdcx(i8*, i64)
+
+; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)


Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-AIX
+
+; Function Attrs: nounwind uwtable
+define dso_local signext i32 @main() local_unnamed_addr {
+; CHECK-LABEL: main:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li 3, -1
+; CHECK-NEXT:li 4, 0
+; CHECK-NEXT:std 3, -8(1)
+; CHECK-NEXT:addi 3, 1, -8
+; CHECK-NEXT:.p2align 5
+; CHECK-NEXT:  .LBB0_1: # %do.body
+; CHECK-NEXT:#
+; CHECK-NEXT:#APP
+; CHECK-NEXT:ldarx 5, 0, 3
+; CHECK-

[PATCH] D105926: [PowerPC] Extra test case for LDARX

2021-07-20 Thread Albion Fung via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG2a7711f33a04: [PowerPC] Extra test case for LDARX (authored 
by Conanap).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105926/new/

https://reviews.llvm.org/D105926

Files:
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll


Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-AIX
+
+; Function Attrs: nounwind uwtable
+define dso_local signext i32 @main() local_unnamed_addr {
+; CHECK-LABEL: main:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li 3, -1
+; CHECK-NEXT:li 4, 0
+; CHECK-NEXT:std 3, -8(1)
+; CHECK-NEXT:addi 3, 1, -8
+; CHECK-NEXT:.p2align 5
+; CHECK-NEXT:  .LBB0_1: # %do.body
+; CHECK-NEXT:#
+; CHECK-NEXT:#APP
+; CHECK-NEXT:ldarx 5, 0, 3
+; CHECK-NEXT:#NO_APP
+; CHECK-NEXT:stdcx. 4, 0, 3
+; CHECK-NEXT:mfocrf 5, 128
+; CHECK-NEXT:srwi 5, 5, 28
+; CHECK-NEXT:cmplwi 5, 0
+; CHECK-NEXT:beq 0, .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %do.end
+; CHECK-NEXT:ld 3, -8(1)
+; CHECK-NEXT:li 4, 55
+; CHECK-NEXT:cmpldi 3, 0
+; CHECK-NEXT:li 3, 66
+; CHECK-NEXT:iseleq 3, 4, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: main:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:li 3, -1
+; CHECK-AIX-NEXT:li 4, 0
+; CHECK-AIX-NEXT:std 3, -8(1)
+; CHECK-AIX-NEXT:addi 3, 1, -8
+; CHECK-AIX-NEXT:.align 5
+; CHECK-AIX-NEXT:  L..BB0_1: # %do.body
+; CHECK-AIX-NEXT:#
+; CHECK-AIX-NEXT:#APP
+; CHECK-AIX-NEXT:ldarx 5, 0, 3
+; CHECK-AIX-NEXT:#NO_APP
+; CHECK-AIX-NEXT:stdcx. 4, 0, 3
+; CHECK-AIX-NEXT:mfocrf 5, 128
+; CHECK-AIX-NEXT:srwi 5, 5, 28
+; CHECK-AIX-NEXT:cmplwi 5, 0
+; CHECK-AIX-NEXT:beq 0, L..BB0_1
+; CHECK-AIX-NEXT:  # %bb.2: # %do.end
+; CHECK-AIX-NEXT:ld 3, -8(1)
+; CHECK-AIX-NEXT:li 4, 55
+; CHECK-AIX-NEXT:cmpldi 3, 0
+; CHECK-AIX-NEXT:li 3, 66
+; CHECK-AIX-NEXT:iseleq 3, 4, 3
+; CHECK-AIX-NEXT:blr
+entry:
+  %x64 = alloca i64, align 8
+  %0 = bitcast i64* %x64 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0)
+  store i64 -1, i64* %x64, align 8
+  br label %do.body
+
+do.body:  ; preds = %do.body, %entry
+  %1 = call i64 asm sideeffect "ldarx $0, ${1:y}", "=r,*Z,~{memory}"(i64* 
nonnull %x64)
+  %2 = call i32 @llvm.ppc.stdcx(i8* nonnull %0, i64 0)
+  %tobool.not = icmp eq i32 %2, 0
+  br i1 %tobool.not, label %do.body, label %do.end
+
+do.end:   ; preds = %do.body
+  %3 = load i64, i64* %x64, align 8
+  %cmp = icmp eq i64 %3, 0
+  %. = select i1 %cmp, i32 55, i32 66
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0)
+  ret i32 %.
+}
+
+; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+
+; Function Attrs: nounwind writeonly
+declare i32 @llvm.ppc.stdcx(i8*, i64)
+
+; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)


Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-check-ldarx-opt.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-AIX
+
+; Function Attrs: nounwind uwtable
+define dso_local signext i32 @main() local_unnamed_addr {
+; CHECK-LABEL: main:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:li 3, -1
+; CHECK-NEXT:li 4, 0
+; CHECK-NEXT:std 3, -8(1)
+; CHECK-NEXT:addi 3, 1, -8
+; CHECK-NEXT:.p2align 5
+; CHECK-NEXT:  .LBB0_1: # %do.body
+; CHECK-NEXT:#
+; CHECK-NEXT:#A

[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-20 Thread Albion Fung via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG3434ac9e3902: [PowerPC] Store, load, move from and to 
registers related builtins (authored by Conanap).

Changed prior to commit:
  https://reviews.llvm.org/D105946?vs=359945&id=360259#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c
  clang/test/CodeGen/builtins-ppc-xlcompat-stfiw.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPC.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReserve-StoreCond.ll
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-stfiw.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
  llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll

Index: llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-prefetch.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX64
+
+declare void @llvm.ppc.dcbtstt(i8*)
+declare void @llvm.ppc.dcbtt(i8*)
+
+@vpa = external local_unnamed_addr global i8*, align 8
+
+define dso_local void @test_dcbtstt() {
+; CHECK-LABEL: test_dcbtstt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtstt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtstt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtstt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtstt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtstt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtstt(i8* %0)
+  ret void
+}
+
+
+define dso_local void @test_dcbtt() {
+; CHECK-LABEL: test_dcbtt:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:ld 3, .LC0@toc@l(3)
+; CHECK-NEXT:ld 3, 0(3)
+; CHECK-NEXT:dcbtt 0, 3
+; CHECK-NEXT:blr
+;
+; CHECK-AIX-LABEL: test_dcbtt:
+; CHECK-AIX:   # %bb.0: # %entry
+; CHECK-AIX-NEXT:lwz 3, L..C0(2) # @vpa
+; CHECK-AIX-NEXT:lwz 3, 0(3)
+; CHECK-AIX-NEXT:dcbtt 0, 3
+; CHECK-AIX-NEXT:blr
+;
+; CHECK-AIX64-LABEL: test_dcbtt:
+; CHECK-AIX64:   # %bb.0: # %entry
+; CHECK-AIX64-NEXT:ld 3, L..C0(2) # @vpa
+; CHECK-AIX64-NEXT:ld 3, 0(3)
+; CHECK-AIX64-NEXT:dcbtt 0, 3
+; CHECK-AIX64-NEXT:blr
+entry:
+  %0 = load i8*, i8** @vpa, align 8
+  tail call void @llvm.ppc.dcbtt(i8* %0)
+  ret void
+}
Index: llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- /dev/null
+++ llvm/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mftbu()
+declare i32 @llvm.ppc.mfmsr()
+
+define dso_local zeroext i32 @test_mftbu() {
+; CHECK-LABEL: test_mftbu:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mftbu 3
+; CHECK-NEXT:clrldi 3, 3, 32
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mftbu:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:mftbu 3
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = 

[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-20 Thread Albion Fung via Phabricator via cfe-commits
Conanap added a comment.

I'm aware of the getting target issue; the fix will be up soon.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106130: [PowerPC] Implemented mtmsr, mfspr, mtspr Builtins

2021-07-20 Thread Albion Fung via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG2fd1520247de: [PowerPC] Implemented mtmsr, mfspr, mtspr 
Builtins (authored by Conanap).

Changed prior to commit:
  https://reviews.llvm.org/D106130?vs=359424&id=360304#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106130/new/

https://reviews.llvm.org/D106130

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Basic/Targets/PPC.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-error.c
  clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstr64Bit.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-mfspr-mtspr-64bit-only.ll
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-move-tofrom-regs.ll

Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-move-tofrom-regs.ll
===
--- llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-move-tofrom-regs.ll
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-move-tofrom-regs.ll
@@ -3,13 +3,16 @@
 ; RUN:   -mcpu=pwr8 < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr7 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
-; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
 ; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-32BIT
 
 declare i32 @llvm.ppc.mftbu()
 declare i32 @llvm.ppc.mfmsr()
+declare void @llvm.ppc.mtmsr(i32)
+
+@ula = external local_unnamed_addr global i64, align 8
 
 define dso_local zeroext i32 @test_mftbu() {
 ; CHECK-LABEL: test_mftbu:
@@ -44,3 +47,21 @@
   %conv = zext i32 %0 to i64
   ret i64 %conv
 }
+
+define dso_local void @test_mtmsr() {
+; CHECK-LABEL: test_mtmsr:
+; CHECK:mtmsr 3
+; CHECK-NEXT:blr
+;
+; CHECK-32BIT-LABEL: test_mtmsr:
+; CHECK-32BIT:   # %bb.0: # %entry
+; CHECK-32BIT-NEXT:lwz 3, L..C0(2) # @ula
+; CHECK-32BIT-NEXT:lwz 3, 4(3)
+; CHECK-32BIT-NEXT:mtmsr 3, 0
+; CHECK-32BIT-NEXT:blr
+entry:
+  %0 = load i64, i64* @ula, align 8
+  %conv = trunc i64 %0 to i32
+  call void @llvm.ppc.mtmsr(i32 %conv)
+  ret void
+}
Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-mfspr-mtspr-aix32.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN:   -mcpu=pwr7 < %s | FileCheck %s
+
+declare i32 @llvm.ppc.mfspr.i32(i32 immarg)
+declare void @llvm.ppc.mtspr.i32(i32 immarg, i32)
+
+@ula = external dso_local global i32, align 4
+
+define dso_local i32 @test_mfxer() {
+; CHECK-LABEL: test_mfxer:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfxer 3
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 1)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mflr() {
+; CHECK-LABEL: test_mflr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 8
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 8)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfctr() {
+; CHECK-LABEL: test_mfctr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 9
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 9)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfppr() {
+; CHECK-LABEL: test_mfppr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 896
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 896)
+  ret i32 %0
+}
+
+define dso_local i32 @test_mfppr32() {
+; CHECK-LABEL: test_mfppr32:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:mfspr 3, 898
+; CHECK-NEXT:blr
+entry:
+  %0 = call i32 @llvm.ppc.mfspr.i32(i32 898)
+  ret i32 %0
+}
+
+define dso_local void @test_mtxer() {
+; CHECK-LABEL: test_mtxer:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lwz 3, L..C0(2) # @ula
+; CHECK-NEXT:lwz 3, 0(3)
+; CHECK-NEXT:mtxer 3
+; CHECK-NEXT:blr
+entry:
+  %0 = load i32, i32* @ula, align 8
+  tail call void @llvm.ppc.mtspr.i32(i32 1, i32 %0)
+  ret void
+}
+
+define dso_local void @test_mtlr() {
+; CHECK-LABEL: test_mtlr:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lwz 3, L..C0(2) # @ula
+; CHECK-NEXT:lwz 3, 0(3)
+; CHECK-NEXT:mtspr 8, 3
+; CHECK-NEXT:blr
+entry:
+  %0 = load i32, i32* @ula, align 8
+  tail call void @llvm.ppc.mtspr.i32(i32 8, i32 %0)

[PATCH] D105946: [PowerPC] Store, load, move from and to registers related builtins

2021-07-20 Thread Albion Fung via Phabricator via cfe-commits
Conanap added a comment.

issue should be fixed now; pushed with this: 
https://reviews.llvm.org/D106130#change-PZi4uueeCg9i
(I just had to move the test files into the `PowerPC` folder).

Will continue to monitor


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105946/new/

https://reviews.llvm.org/D105946

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106484: [PowerPC] Add PowerPC "__stbcx" builtin and intrinsic for XL compatibility

2021-07-22 Thread Albion Fung via Phabricator via cfe-commits
Conanap added inline comments.



Comment at: clang/lib/Sema/SemaChecking.cpp:3437
   case PPC::BI__builtin_ppc_icbt:
+  case PPC::BI__builtin_ppc_stbcx:
 return SemaFeatureCheck(*this, TheCall, "isa-v207-instructions",

would prefer if this joined the list of `sthcx`, `lharx` and `lbarx` right 
below this case


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106484/new/

https://reviews.llvm.org/D106484

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106757: [PowerPC] Implement partial vector ld/st builtins for XL compatibility

2021-07-24 Thread Albion Fung via Phabricator via cfe-commits
Conanap added inline comments.



Comment at: clang/lib/Headers/altivec.h:3151
+#else
+#define __vec_ldrmb __builtin_vsx_ldrmb
+#define __vec_strmb __builtin_vsx_strmb

I believe the preference is to have this defined in 
`clang/lib/Basic/Targets/PPC.cpp` under `defineXLCompatMacros`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106757/new/

https://reviews.llvm.org/D106757

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106757: [PowerPC] Implement partial vector ld/st builtins for XL compatibility

2021-07-24 Thread Albion Fung via Phabricator via cfe-commits
Conanap added a comment.

do we need an IR -> ASM test case as well?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106757/new/

https://reviews.llvm.org/D106757

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106757: [PowerPC] Implement partial vector ld/st builtins for XL compatibility

2021-07-25 Thread Albion Fung via Phabricator via cfe-commits
Conanap accepted this revision.
Conanap added a comment.
This revision is now accepted and ready to land.

Thanks for answering the Qs. LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106757/new/

https://reviews.llvm.org/D106757

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106817: [PowerPC] Changed sema checking range for tdw td builtin

2021-07-26 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Herald added subscribers: steven.zhang, shchenz, nemanjai.
Conanap requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

To match xlc behaviour and definition in the PowerPC ISA3.1,
it is a better idea to have the compiler produce an error when a
0 is passed to the builtin. This patch changes the accepted range
from 0 to 31 to 1 to 31.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D106817

Files:
  clang/lib/Sema/SemaChecking.cpp


Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -3395,7 +3395,7 @@
  return SemaBuiltinConstantArgRange(TheCall, 3, 0, 7);
   case PPC::BI__builtin_ppc_tw:
   case PPC::BI__builtin_ppc_tdw:
-return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31);
+return SemaBuiltinConstantArgRange(TheCall, 2, 1, 31);
   case PPC::BI__builtin_ppc_cmpeqb:
   case PPC::BI__builtin_ppc_setb:
   case PPC::BI__builtin_ppc_maddhd:


Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -3395,7 +3395,7 @@
  return SemaBuiltinConstantArgRange(TheCall, 3, 0, 7);
   case PPC::BI__builtin_ppc_tw:
   case PPC::BI__builtin_ppc_tdw:
-return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31);
+return SemaBuiltinConstantArgRange(TheCall, 2, 1, 31);
   case PPC::BI__builtin_ppc_cmpeqb:
   case PPC::BI__builtin_ppc_setb:
   case PPC::BI__builtin_ppc_maddhd:
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106817: [PowerPC] Changed sema checking range for tdw td builtin

2021-07-26 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 361754.
Conanap added a comment.
Herald added a subscriber: kbarton.

Updated test cases


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106817/new/

https://reviews.llvm.org/D106817

Files:
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-error.c


Index: clang/test/CodeGen/builtins-ppc-xlcompat-error.c
===
--- clang/test/CodeGen/builtins-ppc-xlcompat-error.c
+++ clang/test/CodeGen/builtins-ppc-xlcompat-error.c
@@ -17,9 +17,11 @@
 
 void test_trap(void) {
 #ifdef __PPC64__
-  __tdw(lla, llb, 50); //expected-error {{argument value 50 is outside the 
valid range [0, 31]}}
+  __tdw(lla, llb, 50); //expected-error {{argument value 50 is outside the 
valid range [1, 31]}}
+  __tdw(lla, llb, 0); //expected-error {{argument value 0 is outside the valid 
range [1, 31]}}
 #endif
-  __tw(ia, ib, 50); //expected-error {{argument value 50 is outside the valid 
range [0, 31]}}
+  __tw(ia, ib, 50); //expected-error {{argument value 50 is outside the valid 
range [1, 31]}}
+  __tw(ia, ib, 0); //expected-error {{argument value 0 is outside the valid 
range [1, 31]}}
 }
 
 void test_builtin_ppc_rldimi() {
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -3395,7 +3395,7 @@
  return SemaBuiltinConstantArgRange(TheCall, 3, 0, 7);
   case PPC::BI__builtin_ppc_tw:
   case PPC::BI__builtin_ppc_tdw:
-return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31);
+return SemaBuiltinConstantArgRange(TheCall, 2, 1, 31);
   case PPC::BI__builtin_ppc_cmpeqb:
   case PPC::BI__builtin_ppc_setb:
   case PPC::BI__builtin_ppc_maddhd:


Index: clang/test/CodeGen/builtins-ppc-xlcompat-error.c
===
--- clang/test/CodeGen/builtins-ppc-xlcompat-error.c
+++ clang/test/CodeGen/builtins-ppc-xlcompat-error.c
@@ -17,9 +17,11 @@
 
 void test_trap(void) {
 #ifdef __PPC64__
-  __tdw(lla, llb, 50); //expected-error {{argument value 50 is outside the valid range [0, 31]}}
+  __tdw(lla, llb, 50); //expected-error {{argument value 50 is outside the valid range [1, 31]}}
+  __tdw(lla, llb, 0); //expected-error {{argument value 0 is outside the valid range [1, 31]}}
 #endif
-  __tw(ia, ib, 50); //expected-error {{argument value 50 is outside the valid range [0, 31]}}
+  __tw(ia, ib, 50); //expected-error {{argument value 50 is outside the valid range [1, 31]}}
+  __tw(ia, ib, 0); //expected-error {{argument value 0 is outside the valid range [1, 31]}}
 }
 
 void test_builtin_ppc_rldimi() {
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -3395,7 +3395,7 @@
  return SemaBuiltinConstantArgRange(TheCall, 3, 0, 7);
   case PPC::BI__builtin_ppc_tw:
   case PPC::BI__builtin_ppc_tdw:
-return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31);
+return SemaBuiltinConstantArgRange(TheCall, 2, 1, 31);
   case PPC::BI__builtin_ppc_cmpeqb:
   case PPC::BI__builtin_ppc_setb:
   case PPC::BI__builtin_ppc_maddhd:
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106817: [PowerPC] Changed sema checking range for tdw td builtin

2021-07-26 Thread Albion Fung via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG18526b0d661f: [PowerPC] Changed sema checking range for tdw 
td builtin (authored by Conanap).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106817/new/

https://reviews.llvm.org/D106817

Files:
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-xlcompat-error.c


Index: clang/test/CodeGen/builtins-ppc-xlcompat-error.c
===
--- clang/test/CodeGen/builtins-ppc-xlcompat-error.c
+++ clang/test/CodeGen/builtins-ppc-xlcompat-error.c
@@ -17,9 +17,11 @@
 
 void test_trap(void) {
 #ifdef __PPC64__
-  __tdw(lla, llb, 50); //expected-error {{argument value 50 is outside the 
valid range [0, 31]}}
+  __tdw(lla, llb, 50); //expected-error {{argument value 50 is outside the 
valid range [1, 31]}}
+  __tdw(lla, llb, 0); //expected-error {{argument value 0 is outside the valid 
range [1, 31]}}
 #endif
-  __tw(ia, ib, 50); //expected-error {{argument value 50 is outside the valid 
range [0, 31]}}
+  __tw(ia, ib, 50); //expected-error {{argument value 50 is outside the valid 
range [1, 31]}}
+  __tw(ia, ib, 0); //expected-error {{argument value 0 is outside the valid 
range [1, 31]}}
 }
 
 void test_builtin_ppc_rldimi() {
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -3395,7 +3395,7 @@
  return SemaBuiltinConstantArgRange(TheCall, 3, 0, 7);
   case PPC::BI__builtin_ppc_tw:
   case PPC::BI__builtin_ppc_tdw:
-return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31);
+return SemaBuiltinConstantArgRange(TheCall, 2, 1, 31);
   case PPC::BI__builtin_ppc_cmpeqb:
   case PPC::BI__builtin_ppc_setb:
   case PPC::BI__builtin_ppc_maddhd:


Index: clang/test/CodeGen/builtins-ppc-xlcompat-error.c
===
--- clang/test/CodeGen/builtins-ppc-xlcompat-error.c
+++ clang/test/CodeGen/builtins-ppc-xlcompat-error.c
@@ -17,9 +17,11 @@
 
 void test_trap(void) {
 #ifdef __PPC64__
-  __tdw(lla, llb, 50); //expected-error {{argument value 50 is outside the valid range [0, 31]}}
+  __tdw(lla, llb, 50); //expected-error {{argument value 50 is outside the valid range [1, 31]}}
+  __tdw(lla, llb, 0); //expected-error {{argument value 0 is outside the valid range [1, 31]}}
 #endif
-  __tw(ia, ib, 50); //expected-error {{argument value 50 is outside the valid range [0, 31]}}
+  __tw(ia, ib, 50); //expected-error {{argument value 50 is outside the valid range [1, 31]}}
+  __tw(ia, ib, 0); //expected-error {{argument value 0 is outside the valid range [1, 31]}}
 }
 
 void test_builtin_ppc_rldimi() {
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -3395,7 +3395,7 @@
  return SemaBuiltinConstantArgRange(TheCall, 3, 0, 7);
   case PPC::BI__builtin_ppc_tw:
   case PPC::BI__builtin_ppc_tdw:
-return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31);
+return SemaBuiltinConstantArgRange(TheCall, 2, 1, 31);
   case PPC::BI__builtin_ppc_cmpeqb:
   case PPC::BI__builtin_ppc_setb:
   case PPC::BI__builtin_ppc_maddhd:
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D85453: [PowerPC] Implement __int128 vector divide operations

2020-09-15 Thread Albion Fung via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG05aa997d511e: [PowerPC] Implement __int128 vector divide 
operations (authored by Conanap).

Changed prior to commit:
  https://reviews.llvm.org/D85453?vs=283671&id=291999#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D85453/new/

https://reviews.llvm.org/D85453

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-divide.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
===
--- llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
+++ llvm/test/CodeGen/PowerPC/p10-vector-divide.ll
@@ -76,6 +76,24 @@
   ret <4 x i32> %div
 }
 
+define <1 x i128> @test_vdivsq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vdivsq:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vdivsq v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = sdiv <1 x i128> %x, %y
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vdivuq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vdivuq:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vdivuq v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = udiv <1 x i128> %x, %y
+  ret <1 x i128> %tmp
+}
+
 define <2 x i64> @test_vdivesd(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vdivesd:
 ; CHECK:   # %bb.0: # %entry
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1285,9 +1285,11 @@
[(set v1i128:$vD, (int_ppc_altivec_vmsumcud
  v2i64:$vA, v2i64:$vB, v1i128:$vC))]>;
   def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-"vdivsq $vD, $vA, $vB", IIC_VecGeneral, []>;
+"vdivsq $vD, $vA, $vB", IIC_VecGeneral,
+[(set v1i128:$vD, (sdiv v1i128:$vA, v1i128:$vB))]>;
   def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-"vdivuq $vD, $vA, $vB", IIC_VecGeneral, []>;
+"vdivuq $vD, $vA, $vB", IIC_VecGeneral,
+[(set v1i128:$vD, (udiv v1i128:$vA, v1i128:$vB))]>;
   def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
  "vdivesq $vD, $vA, $vB", IIC_VecGeneral, []>;
   def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -888,6 +888,8 @@
   setOperationAction(ISD::SREM, MVT::v2i64, Legal);
   setOperationAction(ISD::UREM, MVT::v4i32, Legal);
   setOperationAction(ISD::SREM, MVT::v4i32, Legal);
+  setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
+  setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
 }
 
 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -17,6 +17,7 @@
 vector unsigned int vuia, vuib, vuic;
 vector signed long long vslla, vsllb;
 vector unsigned long long vulla, vullb, vullc;
+vector signed __int128 vsi128a, vsi128b;
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
@@ -69,6 +70,18 @@
   return vec_div(vulla, vullb);
 }
 
+vector unsigned __int128 test_vec_div_u128(void) {
+  // CHECK: udiv <1 x i128>
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_div(vui128a, vui128b);
+}
+
+vector signed __int128 test_vec_div_s128(void) {
+  // CHECK: sdiv <1 x i128>
+  // CHECK-NEXT: ret <1 x i128>
+  return vec_div(vsi128a, vsi128b);
+}
+
 vector signed int test_vec_dive_si(void) {
   // CHECK: @llvm.ppc.altivec.vdivesw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}})
   // CHECK-NEXT: ret <4 x i32>
Index: clang/lib/Headers/altivec.h
===
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -3368,6 +3368,18 @@
 }
 #endif
 
+#ifdef __POWER10_VECTOR__
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_div(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __a / __b;
+}
+
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_div(vector signed __int128 __a, vector signed __int128 __b) {
+  return __a / __b;
+}
+#endif __POWER10_VECTOR__
+
 /* vec_dss */
 
 #define vec_dss __builtin_altivec_dss
__

[PATCH] D87729: [PowerPC] Implement the 128-bit Vector Divide Extended Builtins in Clang/LLVM

2020-09-15 Thread Albion Fung via Phabricator via cfe-commits
Conanap requested changes to this revision.
Conanap added a comment.
This revision now requires changes to proceed.

Looks like there's some unit test failures; could you double check?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87729/new/

https://reviews.llvm.org/D87729

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87729: [PowerPC] Implement the 128-bit Vector Divide Extended Builtins in Clang/LLVM

2020-09-15 Thread Albion Fung via Phabricator via cfe-commits
Conanap accepted this revision.
Conanap added a comment.
This revision is now accepted and ready to land.

LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87729/new/

https://reviews.llvm.org/D87729

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87804: [PowerPC][Power10] Implement Vector signed/unsigned __int128 overloads for the comparison builtins

2020-09-16 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Conanap added reviewers: PowerPC, saghir, nemanjai, hfinkel.
Conanap added projects: LLVM, PowerPC, clang.
Herald added a subscriber: kbarton.
Conanap requested review of this revision.

This patch implements Vector signed/unsigned __int128 overloads for the 
comparison builtins.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D87804

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/test/CodeGen/PowerPC/vec_cmpq.ll

Index: llvm/test/CodeGen/PowerPC/vec_cmpq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/vec_cmpq.ll
@@ -0,0 +1,233 @@
+; Test the quadword comparison instructions that were added in POWER10.
+;
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 < %s | FileCheck %s
+define <1 x i128> @v1si128_cmp(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+   %cmp = icmp eq <1 x i128> %x, %y
+   %result = sext <1 x i1> %cmp to <1 x i128>
+   ret <1 x i128> %result
+; CHECK-LABEL: v1si128_cmp:
+; CHECK: vcmpequq 2, 2, 3
+}
+
+define <2 x i128> @v2si128_cmp(<2 x i128> %x, <2 x i128> %y) nounwind readnone {
+   %cmp = icmp eq <2 x i128> %x, %y
+   %result = sext <2 x i1> %cmp to <2 x i128>
+   ret <2 x i128> %result
+; CHECK-LABEL: v2si128_cmp
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <4 x i128> @v4si128_cmp(<4 x i128> %x, <4 x i128> %y) nounwind readnone {
+   %cmp = icmp eq <4 x i128> %x, %y
+   %result = sext <4 x i1> %cmp to <4 x i128>
+   ret <4 x i128> %result
+; CHECK-LABEL: v4si128_cmp
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <8 x i128> @v8si128_cmp(<8 x i128> %x, <8 x i128> %y) nounwind readnone {
+   %cmp = icmp eq <8 x i128> %x, %y
+   %result = sext <8 x i1> %cmp to <8 x i128>
+   ret <8 x i128> %result
+; CHECK-LABEL: v8si128_cmp
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <16 x i128> @v16si128_cmp(<16 x i128> %x, <16 x i128> %y) nounwind readnone {
+   %cmp = icmp eq <16 x i128> %x, %y
+   %result = sext <16 x i1> %cmp to <16 x i128>
+   ret <16 x i128> %result
+; CHECK-LABEL: v16si128_cmp
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Greater than signed
+define <1 x i128> @v1si128_cmp_gt(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+   %cmp = icmp sgt <1 x i128> %x, %y
+   %result = sext <1 x i1> %cmp to <1 x i128>
+   ret <1 x i128> %result
+; CHECK-LABEL: v1si128_cmp_gt
+; CHECK: vcmpgtsq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <2 x i128> @v2si128_cmp_gt(<2 x i128> %x, <2 x i128> %y) nounwind readnone {
+   %cmp = icmp sgt <2 x i128> %x, %y
+   %result = sext <2 x i1> %cmp to <2 x i128>
+   ret <2 x i128> %result
+; CHECK-LABEL: v2si128_cmp_gt
+; CHECK: vcmpgtsq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <4 x i128> @v4si128_cmp_gt(<4 x i128> %x, <4 x i128> %y) nounwind readnone {
+   %cmp = icmp sgt <4 x i128> %x, %y
+   %result = sext <4 x i1> 

[PATCH] D87394: [PowerPC][Power10] Implementation of 128-bit Binary Vector Mod and Sign Extend builtins

2020-09-17 Thread Albion Fung via Phabricator via cfe-commits
Conanap added inline comments.



Comment at: clang/lib/Headers/altivec.h:17394
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_mod(vector signed __int128 __a, vector signed __int128 __b) {
+  return __builtin_altivec_vmodsq(__a, __b);

amyk wrote:
> nit: Move these under the existing `vec_mod` builtins.
> Also, is it possible for these to be open coded instead? We have `vec_mod` 
> for other types that are open coded. 
> 
Probably? Although the downsteram impementation is actually not open coded. We 
can still change it though.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87394/new/

https://reviews.llvm.org/D87394

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D84962: [PowerPC] Correct cpsgn's behaviour on PowerPC to match that of the ABI

2020-09-17 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 292589.
Conanap added a comment.
Herald added a subscriber: kbarton.

Added extra test case to demonstrate that the arguments will be reversed


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84962/new/

https://reviews.llvm.org/D84962

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-vsx.c


Index: clang/test/CodeGen/builtins-ppc-vsx.c
===
--- clang/test/CodeGen/builtins-ppc-vsx.c
+++ clang/test/CodeGen/builtins-ppc-vsx.c
@@ -1832,3 +1832,10 @@
 // CHECK-NEXT-LE: %{{[0-9]+}} = extractelement <2 x i64> %[[V1]], i32 0
 
 }
+
+void testVectorCpsgn(vector float a, vector float b) {
+// CHECK-LABEL: testVectorCpsgn
+// %5 is b, %4 is a
+// CHECK: %6 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %5, <4 x 
float> %4) 
+  vec_cpsgn(a, b);
+}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -14236,7 +14236,7 @@
 Value *Y = EmitScalarExpr(E->getArg(1));
 ID = Intrinsic::copysign;
 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
-return Builder.CreateCall(F, {X, Y});
+return Builder.CreateCall(F, {Y, X});
   }
   // Rounding/truncation
   case PPC::BI__builtin_vsx_xvrspip:


Index: clang/test/CodeGen/builtins-ppc-vsx.c
===
--- clang/test/CodeGen/builtins-ppc-vsx.c
+++ clang/test/CodeGen/builtins-ppc-vsx.c
@@ -1832,3 +1832,10 @@
 // CHECK-NEXT-LE: %{{[0-9]+}} = extractelement <2 x i64> %[[V1]], i32 0
 
 }
+
+void testVectorCpsgn(vector float a, vector float b) {
+// CHECK-LABEL: testVectorCpsgn
+// %5 is b, %4 is a
+// CHECK: %6 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %5, <4 x float> %4) 
+  vec_cpsgn(a, b);
+}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -14236,7 +14236,7 @@
 Value *Y = EmitScalarExpr(E->getArg(1));
 ID = Intrinsic::copysign;
 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
-return Builder.CreateCall(F, {X, Y});
+return Builder.CreateCall(F, {Y, X});
   }
   // Rounding/truncation
   case PPC::BI__builtin_vsx_xvrspip:
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87394: [PowerPC][Power10] Implementation of 128-bit Binary Vector Mod and Sign Extend builtins

2020-09-18 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 292901.
Conanap added a comment.

Open coded VMODS/UQ instead.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87394/new/

https://reviews.llvm.org/D87394

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  clang/test/CodeGen/builtins-ppc-p9vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrAltivec.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll
  llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; This test case aims to test vector sign extend builtins.
+
+declare <4 x i32> @llvm.ppc.altivec.vextsb2w(<16 x i8>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vextsb2d(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.ppc.altivec.vextsh2w(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vextsh2d(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vextsw2d(<4 x i32>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vextsd2q(<2 x i64>) nounwind readnone
+
+define <4 x i32> @test_vextsb2w(<16 x i8> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsb2w:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vextsb2w v2, v2
+; CHECK-NEXT:blr
+  %tmp = tail call <4 x i32> @llvm.ppc.altivec.vextsb2w(<16 x i8> %x)
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_vextsb2d(<16 x i8> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsb2d:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vextsb2d v2, v2
+; CHECK-NEXT:blr
+  %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsb2d(<16 x i8> %x)
+  ret <2 x i64> %tmp
+}
+
+define <4 x i32> @test_vextsh2w(<8 x i16> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsh2w:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vextsh2w v2, v2
+; CHECK-NEXT:blr
+  %tmp = tail call <4 x i32> @llvm.ppc.altivec.vextsh2w(<8 x i16> %x)
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_vextsh2d(<8 x i16> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsh2d:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vextsh2d v2, v2
+; CHECK-NEXT:blr
+  %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsh2d(<8 x i16> %x)
+  ret <2 x i64> %tmp
+}
+
+define <2 x i64> @test_vextsw2d(<4 x i32> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsw2d:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vextsw2d v2, v2
+; CHECK-NEXT:blr
+  %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsw2d(<4 x i32> %x)
+  ret <2 x i64> %tmp
+}
+
+define <1 x i128> @test_vextsd2q(<2 x i64> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsd2q:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vextsd2q v2, v2
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vextsd2q(<2 x i64> %x)
+  ret <1 x i128> %tmp
+}
Index: llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll
===
--- llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll
+++ llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll
@@ -10,6 +10,28 @@
 ; The vector modulo instructions operate on signed and unsigned words
 ; and doublewords.
 
+; The vector modulo instructions operate on signed and unsigned words,
+; doublewords and 128-bit values.
+
+
+define <1 x i128> @test_vmodsq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vmodsq:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmodsq v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = srem <1 x i128> %x, %y
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmoduq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vmoduq:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmoduq v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = urem <1 x i128> %x, %y
+  ret <1 x i128> %tmp
+}
+
 define <2 x i64> @test_vmodud(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vmodud:
 ; CHECK:   # %bb.0: # %entry
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1301,11 +1301,14 @@
   def VCMPGTSQ_rec : VCMPo <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>;
   def VCMPGTUQ_rec : VCMPo <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>;
   def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-"vmodsq $vD, $vA, $vB", IIC_VecGeneral, []>;
+"vmodsq $vD, $vA, $vB", IIC_VecG

[PATCH] D87394: [PowerPC][Power10] Implementation of 128-bit Binary Vector Mod and Sign Extend builtins

2020-09-20 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 293076.
Conanap added a comment.

Open Coded instead


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87394/new/

https://reviews.llvm.org/D87394

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  clang/test/CodeGen/builtins-ppc-p9vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrAltivec.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll
  llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; This test case aims to test vector sign extend builtins.
+
+declare <4 x i32> @llvm.ppc.altivec.vextsb2w(<16 x i8>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vextsb2d(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.ppc.altivec.vextsh2w(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vextsh2d(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vextsw2d(<4 x i32>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vextsd2q(<2 x i64>) nounwind readnone
+
+define <4 x i32> @test_vextsb2w(<16 x i8> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsb2w:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vextsb2w v2, v2
+; CHECK-NEXT:blr
+  %tmp = tail call <4 x i32> @llvm.ppc.altivec.vextsb2w(<16 x i8> %x)
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_vextsb2d(<16 x i8> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsb2d:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vextsb2d v2, v2
+; CHECK-NEXT:blr
+  %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsb2d(<16 x i8> %x)
+  ret <2 x i64> %tmp
+}
+
+define <4 x i32> @test_vextsh2w(<8 x i16> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsh2w:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vextsh2w v2, v2
+; CHECK-NEXT:blr
+  %tmp = tail call <4 x i32> @llvm.ppc.altivec.vextsh2w(<8 x i16> %x)
+  ret <4 x i32> %tmp
+}
+
+define <2 x i64> @test_vextsh2d(<8 x i16> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsh2d:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vextsh2d v2, v2
+; CHECK-NEXT:blr
+  %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsh2d(<8 x i16> %x)
+  ret <2 x i64> %tmp
+}
+
+define <2 x i64> @test_vextsw2d(<4 x i32> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsw2d:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vextsw2d v2, v2
+; CHECK-NEXT:blr
+  %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsw2d(<4 x i32> %x)
+  ret <2 x i64> %tmp
+}
+
+define <1 x i128> @test_vextsd2q(<2 x i64> %x) nounwind readnone {
+; CHECK-LABEL: test_vextsd2q:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vextsd2q v2, v2
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vextsd2q(<2 x i64> %x)
+  ret <1 x i128> %tmp
+}
Index: llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll
===
--- llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll
+++ llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll
@@ -10,6 +10,28 @@
 ; The vector modulo instructions operate on signed and unsigned words
 ; and doublewords.
 
+; The vector modulo instructions operate on signed and unsigned words,
+; doublewords and 128-bit values.
+
+
+define <1 x i128> @test_vmodsq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vmodsq:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmodsq v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = srem <1 x i128> %x, %y
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmoduq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vmoduq:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmoduq v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = urem <1 x i128> %x, %y
+  ret <1 x i128> %tmp
+}
+
 define <2 x i64> @test_vmodud(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vmodud:
 ; CHECK:   # %bb.0: # %entry
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1338,11 +1338,14 @@
   def VCMPGTSQ_rec : VCMPo <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>;
   def VCMPGTUQ_rec : VCMPo <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>;
   def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-"vmodsq $vD, $vA, $vB", IIC_VecGeneral, []>;
+"vmodsq $vD, $vA, $vB", IIC_VecGeneral,
+[(set v1i128:$vD

[PATCH] D87394: [PowerPC][Power10] Implementation of 128-bit Binary Vector Mod and Sign Extend builtins

2020-09-20 Thread Albion Fung via Phabricator via cfe-commits
Conanap added inline comments.



Comment at: clang/lib/Headers/altivec.h:17394
+static __inline__ vector signed __int128 __ATTRS_o_ai
+vec_mod(vector signed __int128 __a, vector signed __int128 __b) {
+  return __builtin_altivec_vmodsq(__a, __b);

Conanap wrote:
> amyk wrote:
> > nit: Move these under the existing `vec_mod` builtins.
> > Also, is it possible for these to be open coded instead? We have `vec_mod` 
> > for other types that are open coded. 
> > 
> Probably? Although the downsteram impementation is actually not open coded. 
> We can still change it though.
> Probably? Although the downsteram impementation is actually not open coded. 
> We can still change it though.




CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87394/new/

https://reviews.llvm.org/D87394

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87671: [PowerPC] Implement Vector String Isolate Builtins in Clang/LLVM

2020-09-21 Thread Albion Fung via Phabricator via cfe-commits
Conanap accepted this revision.
Conanap added a comment.
This revision is now accepted and ready to land.

lgtm


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87671/new/

https://reviews.llvm.org/D87671

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87910: [PowerPC] Implement the 128-bit vec_[all|any]_[eq | ne | lt | gt | le | ge] builtins in Clang/LLVM

2020-09-21 Thread Albion Fung via Phabricator via cfe-commits
Conanap accepted this revision.
Conanap added a comment.
This revision is now accepted and ready to land.

lgtm


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87910/new/

https://reviews.llvm.org/D87910

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86819: [PowerPC][Power10] Implementation of 128-bit Binary Vector Rotate builtins

2020-09-22 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 293474.
Conanap marked 2 inline comments as done.
Conanap added a comment.

Changed implementation for vrlqnm as per Nemanja


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86819/new/

https://reviews.llvm.org/D86819

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; This test case aims to test the builtins for vector rotate instructions
+; on Power10.
+
+
+define <1 x i128> @test_vrlq(<1 x i128> %x, <1 x i128> %y) {
+; CHECK-LABEL: test_vrlq:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vrlq v2, v3, v2
+; CHECK-NEXT:blr
+  %shl.i = shl <1 x i128> %y, %x
+  %sub.i = sub <1 x i128> , %x
+  %lshr.i = lshr <1 x i128> %y, %sub.i
+  %tmp = or <1 x i128> %shl.i, %lshr.i
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vrlq_cost_mult8(<1 x i128> %x) {
+; CHECK-LABEL: test_vrlq_cost_mult8:
+; CHECK: # %bb.0:
+; CHECK: vrlq v2, v3, v2
+; CHECK-NEXT: blr
+  %shl.i = shl <1 x i128> , %x
+  %sub.i = sub <1 x i128> , %x
+  %lshr.i = lshr <1 x i128> , %sub.i
+  %tmp = or <1 x i128> %shl.i, %lshr.i
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vrlq_cost_non_mult8(<1 x i128> %x) {
+; CHECK-LABEL: test_vrlq_cost_non_mult8:
+; CHECK: # %bb.0:
+; CHECK: vrlq v2, v3, v2
+; CHECK-NEXT: blr
+  %shl.i = shl <1 x i128> , %x
+  %sub.i = sub <1 x i128> , %x
+  %lshr.i = lshr <1 x i128> , %sub.i
+  %tmp = or <1 x i128> %shl.i, %lshr.i
+  ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+define <1 x i128> @test_vrlqmi(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
+; CHECK-LABEL: test_vrlqmi:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vrlqmi v3, v2, v4
+; CHECK-NEXT:vmr v2, v3
+; CHECK-NEXT:blr
+entry:
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128> %a, <1 x i128> %c, <1 x i128> %b)
+  ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+define <1 x i128> @test_vrlqnm(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
+; CHECK-LABEL: test_vrlqnm:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vrlqnm v2, v2, v3
+; CHECK-NEXT:xxland v2, v2, v4
+; CHECK-NEXT:blr
+entry:
+  %0 = tail call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128> %a, <1 x i128> %b)
+  %tmp = and <1 x i128> %0, %c
+  ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>, <1 x i128>, <1 x i128>)
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>, <1 x i128>)
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1446,19 +1446,25 @@
"vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;
   def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
"vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>;
-  def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", []>;
-  def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
-(ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
-"vrlqmi $vD, $vA, $vB", IIC_VecFP, []>,
-RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
   def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;
   def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;
   def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;
-  def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
   def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;
   def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;
   def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;
   def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;
+  def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
+  def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm",
+   [(set v1i128:$vD,
+   (int_ppc_altivec_vrlqnm v1i128:$vA,
+   v1i128:$vB))]>;
+  def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
+(ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
+"vrlqmi $vD, $vA, $vB", IIC_VecFP,
+[(set v1i128:$vD,
+   (int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB,
+  

[PATCH] D87921: Fix -funique-internal-linkage-names to work with -O2 and new pass manager

2020-09-22 Thread Albion Fung via Phabricator via cfe-commits
Conanap added a comment.

It looks like this commit is causing a few failures on nearly all PPC bots and 
a sanitizer bot; would it be possible to revert this commit for now until the 
issue is resolved?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87921/new/

https://reviews.llvm.org/D87921

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87394: [PowerPC][Power10] Implementation of 128-bit Binary Vector Mod and Sign Extend builtins

2020-09-23 Thread Albion Fung via Phabricator via cfe-commits
Conanap closed this revision.
Conanap added a comment.

Committed with Nemanja's comments addressed in the commit. Hash 
d7eb917a7cb793f49e16841fc24826b988dd5c8f 



CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87394/new/

https://reviews.llvm.org/D87394

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87804: [PowerPC][Power10] Implement Vector signed/unsigned __int128 overloads for the comparison builtins

2020-09-23 Thread Albion Fung via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Conanap marked an inline comment as done.
Closed by commit rG88cdbeab417c: [PowerPC] Implement Vector signed/unsigned 
__int128 overloads for the… (authored by Conanap).

Changed prior to commit:
  https://reviews.llvm.org/D87804?vs=292371&id=293853#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87804/new/

https://reviews.llvm.org/D87804

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/test/CodeGen/PowerPC/vec_cmpq.ll

Index: llvm/test/CodeGen/PowerPC/vec_cmpq.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/vec_cmpq.ll
@@ -0,0 +1,250 @@
+; Test the quadword comparison instructions that were added in POWER10.
+;
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 < %s | FileCheck %s
+define <1 x i128> @v1si128_cmp(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+   %cmp = icmp eq <1 x i128> %x, %y
+   %result = sext <1 x i1> %cmp to <1 x i128>
+   ret <1 x i128> %result
+; CHECK-LABEL: v1si128_cmp:
+; CHECK: vcmpequq 2, 2, 3
+}
+
+define <2 x i128> @v2si128_cmp(<2 x i128> %x, <2 x i128> %y) nounwind readnone {
+   %cmp = icmp eq <2 x i128> %x, %y
+   %result = sext <2 x i1> %cmp to <2 x i128>
+   ret <2 x i128> %result
+; CHECK-LABEL: v2si128_cmp
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+}
+
+define <4 x i128> @v4si128_cmp(<4 x i128> %x, <4 x i128> %y) nounwind readnone {
+   %cmp = icmp eq <4 x i128> %x, %y
+   %result = sext <4 x i1> %cmp to <4 x i128>
+   ret <4 x i128> %result
+; CHECK-LABEL: v4si128_cmp
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+}
+
+define <8 x i128> @v8si128_cmp(<8 x i128> %x, <8 x i128> %y) nounwind readnone {
+   %cmp = icmp eq <8 x i128> %x, %y
+   %result = sext <8 x i1> %cmp to <8 x i128>
+   ret <8 x i128> %result
+; CHECK-LABEL: v8si128_cmp
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+}
+
+define <16 x i128> @v16si128_cmp(<16 x i128> %x, <16 x i128> %y) nounwind readnone {
+   %cmp = icmp eq <16 x i128> %x, %y
+   %result = sext <16 x i1> %cmp to <16 x i128>
+   ret <16 x i128> %result
+; CHECK-LABEL: v16si128_cmp
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+}
+
+; Greater than signed
+define <1 x i128> @v1si128_cmp_gt(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+   %cmp = icmp sgt <1 x i128> %x, %y
+   %result = sext <1 x i1> %cmp to <1 x i128>
+   ret <1 x i128> %result
+; CHECK-LABEL: v1si128_cmp_gt
+; CHECK: vcmpgtsq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+}
+
+define <2 x i128> @v2si128_cmp_gt(<2 x i128> %x, <2 x i128> %y) nounwind readnone {
+   %cmp = icmp sgt <2 x i128> %x, %y
+   %result = sext <2 x i1> %cmp to <2 x i128>
+   ret <2 x i128> %result
+; CHECK-LABEL: v2si128_cmp_gt
+; CHECK: vcmpgtsq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: v

[PATCH] D87910: [PowerPC] Implement the 128-bit vec_[all|any]_[eq | ne | lt | gt | le | ge] builtins in Clang/LLVM

2020-09-23 Thread Albion Fung via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG2e7117f84770: [PowerPC] Implement the 128-bit 
vec_[all|any]_[eq | ne | lt | gt | le | ge]… (authored by amyk, committed by 
Conanap).

Changed prior to commit:
  https://reviews.llvm.org/D87910?vs=293161&id=293854#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87910/new/

https://reviews.llvm.org/D87910

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/test/CodeGen/PowerPC/vec_cmpq.ll

Index: llvm/test/CodeGen/PowerPC/vec_cmpq.ll
===
--- llvm/test/CodeGen/PowerPC/vec_cmpq.ll
+++ llvm/test/CodeGen/PowerPC/vec_cmpq.ll
@@ -248,3 +248,31 @@
 ; CHECK: vcmpgtuq {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: blr
 }
+
+declare i32 @llvm.ppc.altivec.vcmpequq.p(i32, <1 x i128>, <1 x i128>) nounwind readnone
+declare i32 @llvm.ppc.altivec.vcmpgtsq.p(i32, <1 x i128>, <1 x i128>) nounwind readnone
+declare i32 @llvm.ppc.altivec.vcmpgtuq.p(i32, <1 x i128>, <1 x i128>) nounwind readnone
+
+define i32 @test_vcmpequq_p(<1 x i128> %x, <1 x i128> %y) {
+  %tmp = tail call i32 @llvm.ppc.altivec.vcmpequq.p(i32 2, <1 x i128> %x, <1 x i128> %y)
+  ret i32 %tmp
+; CHECK-LABEL: test_vcmpequq_p:
+; CHECK: vcmpequq. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+}
+
+define i32 @test_vcmpgtsq_p(<1 x i128> %x, <1 x i128> %y) {
+  %tmp = tail call i32 @llvm.ppc.altivec.vcmpgtsq.p(i32 2, <1 x i128> %x, <1 x i128> %y)
+  ret i32 %tmp
+; CHECK-LABEL: test_vcmpgtsq_p
+; CHECK: vcmpgtsq. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+}
+
+define i32 @test_vcmpgtuq_p(<1 x i128> %x, <1 x i128> %y) {
+  %tmp = tail call i32 @llvm.ppc.altivec.vcmpgtuq.p(i32 2, <1 x i128> %x, <1 x i128> %y)
+  ret i32 %tmp
+; CHECK-LABEL: test_vcmpgtuq_p
+; CHECK: vcmpgtuq. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: blr
+}
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10358,6 +10358,26 @@
 else
   return false;
 break;
+  case Intrinsic::ppc_altivec_vcmpequq_p:
+  case Intrinsic::ppc_altivec_vcmpgtsq_p:
+  case Intrinsic::ppc_altivec_vcmpgtuq_p:
+if (!Subtarget.isISA3_1())
+  return false;
+switch (IntrinsicID) {
+default:
+  llvm_unreachable("Unknown comparison intrinsic.");
+case Intrinsic::ppc_altivec_vcmpequq_p:
+  CompareOpc = 455;
+  break;
+case Intrinsic::ppc_altivec_vcmpgtsq_p:
+  CompareOpc = 903;
+  break;
+case Intrinsic::ppc_altivec_vcmpgtuq_p:
+  CompareOpc = 647;
+  break;
+}
+isDot = true;
+break;
   }
   return true;
 }
@@ -15224,16 +15244,19 @@
 case Intrinsic::ppc_altivec_vcmpequh_p:
 case Intrinsic::ppc_altivec_vcmpequw_p:
 case Intrinsic::ppc_altivec_vcmpequd_p:
+case Intrinsic::ppc_altivec_vcmpequq_p:
 case Intrinsic::ppc_altivec_vcmpgefp_p:
 case Intrinsic::ppc_altivec_vcmpgtfp_p:
 case Intrinsic::ppc_altivec_vcmpgtsb_p:
 case Intrinsic::ppc_altivec_vcmpgtsh_p:
 case Intrinsic::ppc_altivec_vcmpgtsw_p:
 case Intrinsic::ppc_altivec_vcmpgtsd_p:
+case Intrinsic::ppc_altivec_vcmpgtsq_p:
 case Intrinsic::ppc_altivec_vcmpgtub_p:
 case Intrinsic::ppc_altivec_vcmpgtuh_p:
 case Intrinsic::ppc_altivec_vcmpgtuw_p:
 case Intrinsic::ppc_altivec_vcmpgtud_p:
+case Intrinsic::ppc_altivec_vcmpgtuq_p:
   Known.Zero = ~1U;  // All bits but the low one are known to be zero.
   break;
 }
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -370,6 +370,18 @@
   def int_ppc_altivec_vcmpgtuq : GCCBuiltin<"__builtin_altivec_vcmpgtuq">,
   Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty],
 [IntrNoMem]>;
+  def int_ppc_altivec_vcmpequq_p : GCCBuiltin<"__builtin_altivec_vcmpequq_p">,
+  Intrinsic<[llvm_i32_ty],
+[llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty],
+[IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsq_p : GCCBuiltin<"__builtin_altivec_vcmpgtsq_p">,
+  Intrinsic<[llvm_i32_ty],
+[llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty],
+[IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtuq_p : GCCBuiltin<"__builtin_altivec_vcmpgtuq_p">,
+  Intrinsic<[llvm_i32_ty],
+[llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty],
+  

[PATCH] D82502: [PowerPC] Implement Load VSX Vector and Sign Extend and Zero Extend

2020-08-28 Thread Albion Fung via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Conanap marked an inline comment as done.
Closed by commit rG331dcc43eac2: [PowerPC] Implemented Vector Load with Zero 
and Signed Extend Builtins (authored by Conanap).

Changed prior to commit:
  https://reviews.llvm.org/D82502?vs=283061&id=288637#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll

Index: llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
===
--- llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -239,3 +239,181 @@
   store i64 %conv, i64* %add.ptr, align 8
   ret void
 }
+
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lxvrbx v2, r4, r3
+; CHECK-NEXT:blr
+;
+; CHECK-O0-LABEL: vec_xl_zext:
+; CHECK-O0:   # %bb.0: # %entry
+; CHECK-O0-NEXT:lxvrbx vs0, r4, r3
+; CHECK-O0-NEXT:xxlor v2, vs0, vs0
+; CHECK-O0-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_short:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lxvrhx v2, r4, r3
+; CHECK-NEXT:blr
+;
+; CHECK-O0-LABEL: vec_xl_zext_short:
+; CHECK-O0:   # %bb.0: # %entry
+; CHECK-O0-NEXT:sldi r3, r3, 1
+; CHECK-O0-NEXT:lxvrhx vs0, r4, r3
+; CHECK-O0-NEXT:xxlor v2, vs0, vs0
+; CHECK-O0-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+  %0 = load i16, i16* %add.ptr, align 2
+  %conv = zext i16 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_word:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 2
+; CHECK-NEXT:lxvrwx v2, r4, r3
+; CHECK-NEXT:blr
+;
+; CHECK-O0-LABEL: vec_xl_zext_word:
+; CHECK-O0:   # %bb.0: # %entry
+; CHECK-O0-NEXT:sldi r3, r3, 2
+; CHECK-O0-NEXT:lxvrwx vs0, r4, r3
+; CHECK-O0-NEXT:xxlor v2, vs0, vs0
+; CHECK-O0-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+  %0 = load i32, i32* %add.ptr, align 4
+  %conv = zext i32 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) {
+; CHECK-LABEL: vec_xl_zext_dw:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 3
+; CHECK-NEXT:lxvrdx v2, r4, r3
+; CHECK-NEXT:blr
+;
+; CHECK-O0-LABEL: vec_xl_zext_dw:
+; CHECK-O0:   # %bb.0: # %entry
+; CHECK-O0-NEXT:sldi r3, r3, 3
+; CHECK-O0-NEXT:lxvrdx vs0, r4, r3
+; CHECK-O0-NEXT:xxlor v2, vs0, vs0
+; CHECK-O0-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
+  %0 = load i64, i64* %add.ptr, align 8
+  %conv = zext i64 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_sext_b(i64 %offset, i8* %p) {
+; CHECK-LABEL: vec_xl_sext_b:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:lbzx r3, r4, r3
+; CHECK-NEXT:extsb r3, r3
+; CHECK-NEXT:sradi r4, r3, 63
+; CHECK-NEXT:mtvsrdd v2, r4, r3
+; CHECK-NEXT:blr
+;
+; CHECK-O0-LABEL: vec_xl_sext_b:
+; CHECK-O0:   # %bb.0: # %entry
+; CHECK-O0-NEXT:lbzx r3, r4, r3
+; CHECK-O0-NEXT:extsb r3, r3
+; CHECK-O0-NEXT:sradi r4, r3, 63
+; CHECK-O0-NEXT:mtvsrdd v2, r4, r3
+; CHECK-O0-NEXT:blr
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %p, i64 %offset
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = sext i8 %0 to i128
+  %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+  ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_sext_h(i64 %offset, i16* %p) {
+; CHECK-LABEL: vec_xl_sext_h:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:sldi r3, r3, 1
+; CHECK-NEXT:lhax r3, r4, r3
+; CHECK-NE

[PATCH] D86819: [PowerPC][Power10] Implementation of 128-bit Binary Vector Rotate builtins

2020-08-28 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Conanap added reviewers: PowerPC, saghir, nemanjai, hfinkel.
Conanap added projects: LLVM, clang, PowerPC.
Herald added subscribers: danielkiss, steven.zhang, kbarton.
Conanap requested review of this revision.

This patch implements 128-bit Binary Vector Rotate builtins for PowerPC10.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D86819

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; This test case aims to test the builtins for vector rotate instructions
+; on Power10.
+
+
+define <1 x i128> @test_vrlq(<1 x i128> %x, <1 x i128> %y) nounwind readnone {
+; CHECK-LABEL: test_vrlq:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vrlq v2, v3, v2
+; CHECK-NEXT:blr
+  %shl.i = shl <1 x i128> %y, %x
+  %sub.i = sub <1 x i128> , %x
+  %lshr.i = lshr <1 x i128> %y, %sub.i
+  %tmp = or <1 x i128> %shl.i, %lshr.i
+  ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+define <1 x i128> @test_vrlqmi(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
+; CHECK-LABEL: test_vrlqmi:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vrlqmi v3, v2, v4
+; CHECK-NEXT:vmr v2, v3
+; CHECK-NEXT:blr
+entry:
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128> %a, <1 x i128> %c, <1 x i128> %b)
+  ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+define <1 x i128> @test_vrlqnm(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
+; CHECK-LABEL: test_vrlqnm:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vrlqnm v2, v2, v3
+; CHECK-NEXT:xxland v2, v2, v4
+; CHECK-NEXT:blr
+entry:
+  %0 = tail call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128> %a, <1 x i128> %b)
+  %tmp = and <1 x i128> %0, %c
+  ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>, <1 x i128>, <1 x i128>)
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>, <1 x i128>)
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1285,19 +1285,25 @@
"vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;
   def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
"vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>;
-  def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", []>;
-  def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
-(ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
-"vrlqmi $vD, $vA, $vB", IIC_VecFP, []>,
-RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
   def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;
   def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;
   def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;
-  def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
   def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;
   def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;
   def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;
   def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;
+  def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
+  def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm",
+   [(set v1i128:$vD,
+   (int_ppc_altivec_vrlqnm v1i128:$vA,
+   v1i128:$vB))]>;
+  def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
+(ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
+"vrlqmi $vD, $vA, $vB", IIC_VecFP,
+[(set v1i128:$vD,
+   (int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB,
+   v1i128:$vDi))]>,
+RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
 }
 
 // Anonymous Patterns //
@@ -1344,6 +1350,9 @@
  (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
   def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
  (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;
+
+  def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)),
+(v1i128 (VRLQ v1i128:$vA, v1i128:

[PATCH] D86819: [PowerPC][Power10] Implementation of 128-bit Binary Vector Rotate builtins

2020-08-31 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 289064.
Conanap added a comment.

Included extra test cases


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86819/new/

https://reviews.llvm.org/D86819

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
===
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; This test case aims to test the builtins for vector rotate instructions
+; on Power10.
+
+
+define <1 x i128> @test_vrlq(<1 x i128> %x, <1 x i128> %y) {
+; CHECK-LABEL: test_vrlq:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vrlq v2, v3, v2
+; CHECK-NEXT:blr
+  %shl.i = shl <1 x i128> %y, %x
+  %sub.i = sub <1 x i128> , %x
+  %lshr.i = lshr <1 x i128> %y, %sub.i
+  %tmp = or <1 x i128> %shl.i, %lshr.i
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vrlq_cost_mult8(<1 x i128> %x) {
+; CHECK-LABEL: test_vrlq_cost_mult8:
+; CHECK: # %bb.0:
+; CHECK: vrlq v2, v3, v2
+; CHECK-NEXT: blr
+  %shl.i = shl <1 x i128> , %x
+  %sub.i = sub <1 x i128> , %x
+  %lshr.i = lshr <1 x i128> , %sub.i
+  %tmp = or <1 x i128> %shl.i, %lshr.i
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vrlq_cost_non_mult8(<1 x i128> %x) {
+; CHECK-LABEL: test_vrlq_cost_non_mult8:
+; CHECK: # %bb.0:
+; CHECK: vrlq v2, v3, v2
+; CHECK-NEXT: blr
+  %shl.i = shl <1 x i128> , %x
+  %sub.i = sub <1 x i128> , %x
+  %lshr.i = lshr <1 x i128> , %sub.i
+  %tmp = or <1 x i128> %shl.i, %lshr.i
+  ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+define <1 x i128> @test_vrlqmi(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
+; CHECK-LABEL: test_vrlqmi:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vrlqmi v3, v2, v4
+; CHECK-NEXT:vmr v2, v3
+; CHECK-NEXT:blr
+entry:
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128> %a, <1 x i128> %c, <1 x i128> %b)
+  ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+define <1 x i128> @test_vrlqnm(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) {
+; CHECK-LABEL: test_vrlqnm:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:vrlqnm v2, v2, v3
+; CHECK-NEXT:xxland v2, v2, v4
+; CHECK-NEXT:blr
+entry:
+  %0 = tail call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128> %a, <1 x i128> %b)
+  %tmp = and <1 x i128> %0, %c
+  ret <1 x i128> %tmp
+}
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>, <1 x i128>, <1 x i128>)
+
+; Function Attrs: nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>, <1 x i128>)
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1285,19 +1285,25 @@
"vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;
   def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
"vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>;
-  def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", []>;
-  def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
-(ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
-"vrlqmi $vD, $vA, $vB", IIC_VecFP, []>,
-RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
   def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;
   def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;
   def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;
-  def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
   def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;
   def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;
   def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;
   def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;
+  def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
+  def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm",
+   [(set v1i128:$vD,
+   (int_ppc_altivec_vrlqnm v1i128:$vA,
+   v1i128:$vB))]>;
+  def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
+(ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
+"vrlqmi $vD, $vA, $vB", IIC_VecFP,
+[(set v1i128:$vD,
+   (int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB,
+

[PATCH] D83955: [PowerPC][Power10] Implementation of 128-bit Binary Vector Multiply builtins

2020-09-02 Thread Albion Fung via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG5d1fe3f903b9: [PowerPC] Implemented Vector Multiply Builtins 
(authored by Conanap).

Changed prior to commit:
  https://reviews.llvm.org/D83955?vs=279637&id=289537#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83955/new/

https://reviews.llvm.org/D83955

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll

Index: llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
===
--- llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
+++ llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
@@ -10,6 +10,7 @@
 ; This includes the low order and high order versions of vector multiply.
 ; The low order version operates on doublewords, whereas the high order version
 ; operates on signed and unsigned words and doublewords.
+; This file also includes 128 bit vector multiply instructions.
 
 define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vmulld:
@@ -122,3 +123,54 @@
   %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i64> %mulh
 }
+
+declare <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64>, <2 x i64>, <1 x i128>) nounwind readnone
+
+define <1 x i128> @test_vmuleud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmuleud:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmuleud v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmuloud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmuloud:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmuloud v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmulesd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmulesd:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmulesd v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmulosd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+; CHECK-LABEL: test_vmulosd:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmulosd v2, v2, v3
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64> %x, <2 x i64> %y)
+  ret <1 x i128> %tmp
+}
+
+define <1 x i128> @test_vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) nounwind readnone {
+; CHECK-LABEL: test_vmsumcud:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vmsumcud v2, v2, v3, v4
+; CHECK-NEXT:blr
+  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z)
+  ret <1 x i128> %tmp
+}
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1256,16 +1256,25 @@
   }
 
   def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulesd $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmulesd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA,
+   v2i64:$vB))]>;
   def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmuleud $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmuleud $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA,
+   v2i64:$vB))]>;
   def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulosd $vD, $vA, $vB", IIC_VecGeneral, []>;
+ "vmulosd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA,
+   v2i64:$vB))]>;
   def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmuloud $vD, $vA, $vB", IIC_VecGeneral, []>;
-  def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD),
-   (ins vrrc:$vA, vrr

[PATCH] D82727: [PowerPC] Implement Vector Expand Mask builtins in LLVM/Clang

2020-09-03 Thread Albion Fung via Phabricator via cfe-commits
Conanap added inline comments.



Comment at: llvm/lib/Target/PowerPC/PPCInstrPrefix.td:993
+ [(set v16i8:$vD, 
(int_ppc_altivec_vexpandbm
+  v16i8:$vB))]>;
   def VEXPANDHM : VXForm_RD5_XO5_RS5<1602, 1, (outs vrrc:$vD), (ins vrrc:$vB),

Nit: Please make this indentation inline with the others


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82727/new/

https://reviews.llvm.org/D82727

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


  1   2   3   >