[PATCH] D82726: [PowerPC][Power10] Implement Vector Count Mask Bits builtins in LLVM/Clang

Amy Kwan via Phabricator via cfe-commits Sun, 28 Jun 2020 13:17:02 -0700

amyk created this revision.
amyk added reviewers: PowerPC, power-llvm-team, kamaub, stefanp.
amyk added projects: LLVM, clang.
Herald added subscribers: shchenz, hiraditya, nemanjai.


This patch adds the following function prototypes:

  unsigned long long vec_cntm (vector unsigned char a, const unsigned char mp);
  unsigned long long vec_cntm (vector unsigned short a, const unsigned char mp);
  unsigned long long vec_cntm (vector unsigned int a, const unsigned char mp);
  unsigned long long vec_cntm (vector unsigned long long a, const unsigned char 
mp);

Depends on D82675 <https://reviews.llvm.org/D82675>.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82726

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll
  llvm/test/MC/Disassembler/PowerPC/p10insts.txt
  llvm/test/MC/PowerPC/p10.s

Index: llvm/test/MC/PowerPC/p10.s
===================================================================
--- llvm/test/MC/PowerPC/p10.s
+++ llvm/test/MC/PowerPC/p10.s
@@ -18,6 +18,18 @@
 # CHECK-BE: vextractqm 1, 2                       # encoding: [0x10,0x2c,0x16,0x42]
 # CHECK-LE: vextractqm 1, 2                       # encoding: [0x42,0x16,0x2c,0x10]
             vextractqm 1, 2
+# CHECK-BE: vcntmbb 1, 2, 1                       # encoding: [0x10,0x39,0x16,0x42]
+# CHECK-LE: vcntmbb 1, 2, 1                       # encoding: [0x42,0x16,0x39,0x10]
+            vcntmbb 1, 2, 1
+# CHECK-BE: vcntmbh 1, 2, 1                       # encoding: [0x10,0x3b,0x16,0x42]
+# CHECK-LE: vcntmbh 1, 2, 1                       # encoding: [0x42,0x16,0x3b,0x10]
+            vcntmbh 1, 2, 1
+# CHECK-BE: vcntmbw 1, 2, 0                       # encoding: [0x10,0x3c,0x16,0x42]
+# CHECK-LE: vcntmbw 1, 2, 0                       # encoding: [0x42,0x16,0x3c,0x10]
+            vcntmbw 1, 2, 0
+# CHECK-BE: vcntmbd 1, 2, 0                       # encoding: [0x10,0x3e,0x16,0x42]
+# CHECK-LE: vcntmbd 1, 2, 0                       # encoding: [0x42,0x16,0x3e,0x10]
+            vcntmbd 1, 2, 0
 # CHECK-BE: vpdepd 1, 2, 0                        # encoding: [0x10,0x22,0x05,0xcd]
 # CHECK-LE: vpdepd 1, 2, 0                        # encoding: [0xcd,0x05,0x22,0x10]
             vpdepd 1, 2, 0
Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt
===================================================================
--- llvm/test/MC/Disassembler/PowerPC/p10insts.txt
+++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt
@@ -16,6 +16,18 @@
 # CHECK: vextractqm 1, 2
 0x10 0x2c 0x16 0x42
 
+# CHECK: vcntmbb 1, 2, 1
+0x10 0x39 0x16 0x42
+
+# CHECK: vcntmbh 1, 2, 1
+0x10 0x3b 0x16 0x42
+
+# CHECK: vcntmbw 1, 2, 0
+0x10 0x3c 0x16 0x42
+
+# CHECK: vcntmbd 1, 2, 0
+0x10 0x3e 0x16 0x42
+
 # CHECK: vpdepd 1, 2, 0
 0x10 0x22 0x05 0xcd
 
Index: llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll
+++ llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll
@@ -61,3 +61,48 @@
 declare i32 @llvm.ppc.altivec.vextractwm(<4 x i32>)
 declare i32 @llvm.ppc.altivec.vextractdm(<2 x i64>)
 declare i32 @llvm.ppc.altivec.vextractqm(<1 x i128>)
+
+define i64 @test_vcntmbb(<16 x i8> %a) {
+; CHECK-LABEL: test_vcntmbb:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vcntmbb r3, v2, 1
+; CHECK-NEXT:    blr
+entry:
+  %cnt = tail call i64 @llvm.ppc.altivec.vcntmbb(<16 x i8> %a, i32 1)
+  ret i64 %cnt
+}
+
+define i64 @test_vcntmbh(<8 x i16> %a) {
+; CHECK-LABEL: test_vcntmbh:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vcntmbh r3, v2, 0
+; CHECK-NEXT:    blr
+entry:
+  %cnt = tail call i64 @llvm.ppc.altivec.vcntmbh(<8 x i16> %a, i32 0)
+  ret i64 %cnt
+}
+
+define i64 @test_vcntmbw(<4 x i32> %a) {
+; CHECK-LABEL: test_vcntmbw:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vcntmbw r3, v2, 1
+; CHECK-NEXT:    blr
+entry:
+  %cnt = tail call i64 @llvm.ppc.altivec.vcntmbw(<4 x i32> %a, i32 1)
+  ret i64 %cnt
+}
+
+define i64 @test_vcntmbd(<2 x i64> %a) {
+; CHECK-LABEL: test_vcntmbd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vcntmbd r3, v2, 0
+; CHECK-NEXT:    blr
+entry:
+  %cnt = tail call i64 @llvm.ppc.altivec.vcntmbd(<2 x i64> %a, i32 0)
+  ret i64 %cnt
+}
+
+declare i64 @llvm.ppc.altivec.vcntmbb(<16 x i8>, i32)
+declare i64 @llvm.ppc.altivec.vcntmbh(<8 x i16>, i32)
+declare i64 @llvm.ppc.altivec.vcntmbw(<4 x i32>, i32)
+declare i64 @llvm.ppc.altivec.vcntmbd(<2 x i64>, i32)
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -225,6 +225,29 @@
   let Inst{21-31} = xo;
 }
 
+// VX-Form: [PO VRT / UIM RB XO].
+// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
+// "/ UIM" (unused bit followed by a 4-bit immediate)
+class VX_VRT5_UIM5_RB5<bits<11> xo, string opc, list<dag> pattern>
+  : VXForm_1<xo, (outs vrrc:$VRT), (ins u4imm:$UIM, g8rc:$RB),
+             !strconcat(opc, " $VRT, $RB, $UIM"), IIC_VecGeneral, pattern>;
+
+class VXForm_RD5_MP_VB5<bits<11> xo, bits<4> eo, dag OOL, dag IOL,
+                        string asmstr, InstrItinClass itin, list<dag> pattern>
+  : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> RD;
+  bits<5> VB;
+  bit MP;
+
+  let Pattern = pattern;
+
+  let Inst{6-10}  = RD;
+  let Inst{11-14} = eo;
+  let Inst{15} = MP;
+  let Inst{16-20} = VB;
+  let Inst{21-31} = xo;
+}
+
 multiclass MLS_DForm_R_SI34_RTA5_MEM_p<bits<6> opcode, dag OOL, dag IOL,
                                        dag PCRel_IOL, string asmstr,
                                        InstrItinClass itin> {
@@ -586,6 +609,26 @@
                                       "vextractqm $rD, $vB", IIC_VecGeneral,
                                       [(set i32:$rD,
                                       (int_ppc_altivec_vextractqm v1i128:$vB))]>;
+  def VCNTMBB : VXForm_RD5_MP_VB5<1602, 12, (outs g8rc:$rD),
+                                  (ins vrrc:$vB, u1imm:$MP),
+                                  "vcntmbb $rD, $vB, $MP", IIC_VecGeneral,
+                                  [(set i64:$rD,
+                                  (int_ppc_altivec_vcntmbb v16i8:$vB, timm:$MP))]>;
+  def VCNTMBH : VXForm_RD5_MP_VB5<1602, 13, (outs g8rc:$rD),
+                                  (ins vrrc:$vB, u1imm:$MP),
+                                  "vcntmbh $rD, $vB, $MP", IIC_VecGeneral,
+                                  [(set i64:$rD,
+                                  (int_ppc_altivec_vcntmbh v8i16:$vB, timm:$MP))]>;
+  def VCNTMBW : VXForm_RD5_MP_VB5<1602, 14, (outs g8rc:$rD),
+                                  (ins vrrc:$vB, u1imm:$MP),
+                                  "vcntmbw $rD, $vB, $MP", IIC_VecGeneral,
+                                  [(set i64:$rD,
+                                  (int_ppc_altivec_vcntmbw v4i32:$vB, timm:$MP))]>;
+  def VCNTMBD : VXForm_RD5_MP_VB5<1602, 15, (outs g8rc:$rD),
+                                  (ins vrrc:$vB, u1imm:$MP),
+                                  "vcntmbd $rD, $vB, $MP", IIC_VecGeneral,
+                                  [(set i64:$rD,
+                                  (int_ppc_altivec_vcntmbd v2i64:$vB, timm:$MP))]>;
    def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                          "vpdepd $vD, $vA, $vB", IIC_VecGeneral,
                          [(set v2i64:$vD,
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -435,6 +435,20 @@
   def int_ppc_altivec_vextractqm : GCCBuiltin<"__builtin_altivec_vextractqm">,
               Intrinsic<[llvm_i32_ty], [llvm_v1i128_ty], [IntrNoMem]>;
 
+  // P10 Vector Count with Mask intrinsics.
+  def int_ppc_altivec_vcntmbb : GCCBuiltin<"__builtin_altivec_vcntmbb">,
+              Intrinsic<[llvm_i64_ty], [llvm_v16i8_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+  def int_ppc_altivec_vcntmbh : GCCBuiltin<"__builtin_altivec_vcntmbh">,
+              Intrinsic<[llvm_i64_ty], [llvm_v8i16_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+  def int_ppc_altivec_vcntmbw : GCCBuiltin<"__builtin_altivec_vcntmbw">,
+              Intrinsic<[llvm_i64_ty], [llvm_v4i32_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+  def int_ppc_altivec_vcntmbd : GCCBuiltin<"__builtin_altivec_vcntmbd">,
+              Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
+                        [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
   // P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins.
   def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">,
               Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -43,6 +43,30 @@
   return vec_extractm(vui128a);
 }
 
+unsigned long long test_vec_cntm_uc(void) {
+  // CHECK: @llvm.ppc.altivec.vcntmbb(<16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: ret i64
+  return vec_cntm(vuca, 1);
+}
+
+unsigned long long test_vec_cntm_us(void) {
+  // CHECK: @llvm.ppc.altivec.vcntmbh(<8 x i16> %{{.+}}, i32
+  // CHECK-NEXT: ret i64
+  return vec_cntm(vusa, 0);
+}
+
+unsigned long long test_vec_cntm_ui(void) {
+  // CHECK: @llvm.ppc.altivec.vcntmbw(<4 x i32> %{{.+}}, i32
+  // CHECK-NEXT: ret i64
+  return vec_cntm(vuia, 1);
+}
+
+unsigned long long test_vec_cntm_ull(void) {
+  // CHECK: @llvm.ppc.altivec.vcntmbd(<2 x i64> %{{.+}}, i32
+  // CHECK-NEXT: ret i64
+  return vec_cntm(vulla, 0);
+}
+
 vector unsigned long long test_vpdepd(void) {
   // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64>
   // CHECK-NEXT: ret <2 x i64>
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -2297,6 +2297,20 @@
   return __builtin_altivec_vctzd(__a);
 }
 
+/* vec_cntm */
+
+#ifdef __POWER10_VECTOR__
+#define vec_cntm(__a, __mp)                                                    \
+  _Generic((__a), vector unsigned char                                         \
+           : __builtin_altivec_vcntmbb((__a), (unsigned int)(__mp)),           \
+             vector unsigned short                                             \
+           : __builtin_altivec_vcntmbh((__a), (unsigned int)(__mp)),           \
+             vector unsigned int                                               \
+           : __builtin_altivec_vcntmbw((__a), (unsigned int)(__mp)),           \
+             vector unsigned long long                                         \
+           : __builtin_altivec_vcntmbd((__a), (unsigned int)(__mp)))
+#endif /* __POWER10_VECTOR__ */
+
 /* vec_first_match_index */
 
 static __inline__ unsigned __ATTRS_o_ai
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -305,6 +305,12 @@
 BUILTIN(__builtin_altivec_vextractdm, "UiV2ULLi", "")
 BUILTIN(__builtin_altivec_vextractqm, "UiV1ULLLi", "")
 
+// P10 Vector Count with Mask built-ins.
+BUILTIN(__builtin_altivec_vcntmbb, "ULLiV16UcUi", "")
+BUILTIN(__builtin_altivec_vcntmbh, "ULLiV8UsUi", "")
+BUILTIN(__builtin_altivec_vcntmbw, "ULLiV4UiUi", "")
+BUILTIN(__builtin_altivec_vcntmbd, "ULLiV2ULLiUi", "")
+
 // P10 Vector Parallel Bits built-ins.
 BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "")
 BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D82726: [PowerPC][Power10] Implement Vector Count Mask Bits builtins in LLVM/Clang

Reply via email to