This revision was automatically updated to reflect the committed changes.
Closed by commit rG461fd94f004c: [ARM,MVE] Fix predicate types of some
intrinsics (authored by miyuki).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D74838/new/
https://reviews.llvm.org/D74838
Files:
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
clang/test/CodeGen/arm-mve-intrinsics/vmullbq.c
clang/test/CodeGen/arm-mve-intrinsics/vmulltq.c
llvm/lib/Target/ARM/ARMInstrMVE.td
llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll
llvm/test/CodeGen/Thumb2/mve-intrinsics/vmullbq.ll
llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulltq.ll
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulltq.ll
===================================================================
--- llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulltq.ll
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulltq.ll
@@ -59,14 +59,14 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, <16 x i1> %1, <8 x i16> %inactive)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}
-declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1
-declare <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, i32, <16 x i1>, <8 x i16>) #1
+declare <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v8i1(<16 x i8>, <16 x i8>, i32, i32, <8 x i1>, <8 x i16>) #1
define arm_aapcs_vfpcc <4 x i32> @test_vmulltq_int_m_u16(<4 x i32> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmulltq_int_m_u16:
@@ -77,14 +77,14 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, i32 1, <8 x i1> %1, <4 x i32> %inactive)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %b, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}
-declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1
-declare <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, i32, <8 x i1>, <4 x i32>) #1
+declare <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v4i1(<8 x i16>, <8 x i16>, i32, i32, <4 x i1>, <4 x i32>) #1
define arm_aapcs_vfpcc <2 x i64> @test_vmulltq_int_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmulltq_int_m_s32:
@@ -100,8 +100,6 @@
ret <2 x i64> %2
}
-declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1
-
declare <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, i32, <4 x i1>, <2 x i64>) #1
define arm_aapcs_vfpcc <8 x i16> @test_vmulltq_poly_m_p8(<8 x i16> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
@@ -113,12 +111,12 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, <16 x i1> %1, <8 x i16> %inactive)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, <16 x i8> %b, i32 1, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}
-declare <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>, <8 x i16>) #1
+declare <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v8i1(<16 x i8>, <16 x i8>, i32, <8 x i1>, <8 x i16>) #1
define arm_aapcs_vfpcc <8 x i16> @test_vmulltq_int_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmulltq_int_x_u8:
@@ -129,8 +127,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, i32 1, <16 x i1> %1, <8 x i16> undef)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, <16 x i8> %b, i32 1, i32 1, <8 x i1> %1, <8 x i16> undef)
ret <8 x i16> %2
}
@@ -143,8 +141,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, <8 x i1> %1, <4 x i32> undef)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, <4 x i1> %1, <4 x i32> undef)
ret <4 x i32> %2
}
@@ -172,8 +170,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, <16 x i1> %1, <8 x i16> undef)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, <16 x i8> %b, i32 1, <8 x i1> %1, <8 x i16> undef)
ret <8 x i16> %2
}
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vmullbq.ll
===================================================================
--- llvm/test/CodeGen/Thumb2/mve-intrinsics/vmullbq.ll
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vmullbq.ll
@@ -59,16 +59,16 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, <16 x i1> %1, <8 x i16> %inactive)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}
-declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1
+declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1
-declare <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, i32, <16 x i1>, <8 x i16>) #1
+declare <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v8i1(<16 x i8>, <16 x i8>, i32, i32, <8 x i1>, <8 x i16>) #1
-define arm_aapcs_vfpcc <4 x i32> @test_vmullbq_int_m_u16(<4 x i32> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
+define arm_aapcs_vfpcc <4 x i32> @test_vmullbq_int_m_u16(<4 x i32> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) #0 {
; CHECK-LABEL: test_vmullbq_int_m_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
@@ -77,14 +77,14 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, i32 0, <8 x i1> %1, <4 x i32> %inactive)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %b, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}
-declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1
+declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1
-declare <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, i32, <8 x i1>, <4 x i32>) #1
+declare <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v4i1(<8 x i16>, <8 x i16>, i32, i32, <4 x i1>, <4 x i32>) #1
define arm_aapcs_vfpcc <2 x i64> @test_vmullbq_int_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmullbq_int_m_s32:
@@ -100,8 +100,6 @@
ret <2 x i64> %2
}
-declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1
-
declare <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, i32, <4 x i1>, <2 x i64>) #1
define arm_aapcs_vfpcc <8 x i16> @test_vmullbq_poly_m_p8(<8 x i16> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
@@ -113,12 +111,12 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, <16 x i1> %1, <8 x i16> %inactive)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, <16 x i8> %b, i32 0, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}
-declare <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>, <8 x i16>) #1
+declare <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v8i1(<16 x i8>, <16 x i8>, i32, <8 x i1>, <8 x i16>) #1
define arm_aapcs_vfpcc <8 x i16> @test_vmullbq_int_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
; CHECK-LABEL: test_vmullbq_int_x_u8:
@@ -129,8 +127,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, i32 0, <16 x i1> %1, <8 x i16> undef)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, <16 x i8> %b, i32 1, i32 0, <8 x i1> %1, <8 x i16> undef)
ret <8 x i16> %2
}
@@ -143,8 +141,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, <8 x i1> %1, <4 x i32> undef)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, <4 x i1> %1, <4 x i32> undef)
ret <4 x i32> %2
}
@@ -172,10 +170,9 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.mull.poly.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, <8 x i1> %1, <4 x i32> undef)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.mull.poly.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, <8 x i16> %b, i32 0, <4 x i1> %1, <4 x i32> undef)
ret <4 x i32> %2
}
-declare <4 x i32> @llvm.arm.mve.mull.poly.predicated.v4i32.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>, <4 x i32>) #1
-
+declare <4 x i32> @llvm.arm.mve.mull.poly.predicated.v4i32.v8i16.v4i1(<8 x i16>, <8 x i16>, i32, <4 x i1>, <4 x i32>) #1
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll
===================================================================
--- llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll
+++ llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll
@@ -998,8 +998,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 6, i32 0, i32 0, <16 x i1> %1, <8 x i16> %inactive)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 6, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}
@@ -1012,8 +1012,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 0, i32 0, <16 x i1> %1, <8 x i16> %inactive)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}
@@ -1026,8 +1026,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, i32 0, <8 x i1> %1, <4 x i32> %inactive)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 10, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}
@@ -1040,8 +1040,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 0, i32 0, <8 x i1> %1, <4 x i32> %inactive)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}
@@ -1054,8 +1054,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 3, i32 1, i32 0, <16 x i1> %1, <8 x i16> %inactive)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 3, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}
@@ -1068,8 +1068,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 1, i32 0, <16 x i1> %1, <8 x i16> %inactive)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}
@@ -1082,8 +1082,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 14, i32 1, i32 0, <8 x i1> %1, <4 x i32> %inactive)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 14, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}
@@ -1096,8 +1096,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 1, i32 0, <8 x i1> %1, <4 x i32> %inactive)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}
@@ -1110,8 +1110,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 4, i32 0, i32 1, <16 x i1> %1, <8 x i16> %inactive)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 4, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}
@@ -1124,8 +1124,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 0, i32 1, <16 x i1> %1, <8 x i16> %inactive)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}
@@ -1138,8 +1138,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 12, i32 0, i32 1, <8 x i1> %1, <4 x i32> %inactive)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 12, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}
@@ -1152,8 +1152,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 0, i32 1, <8 x i1> %1, <4 x i32> %inactive)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}
@@ -1166,8 +1166,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 2, i32 1, i32 1, <16 x i1> %1, <8 x i16> %inactive)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 2, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}
@@ -1180,8 +1180,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 1, i32 1, <16 x i1> %1, <8 x i16> %inactive)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive)
ret <8 x i16> %2
}
@@ -1194,8 +1194,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 9, i32 1, i32 1, <8 x i1> %1, <4 x i32> %inactive)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 9, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}
@@ -1208,8 +1208,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 1, i32 1, <8 x i1> %1, <4 x i32> %inactive)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive)
ret <4 x i32> %2
}
@@ -1222,8 +1222,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 1, i32 0, i32 0, <16 x i1> %1, <8 x i16> undef)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 0, i32 0, <8 x i1> %1, <8 x i16> undef)
ret <8 x i16> %2
}
@@ -1236,8 +1236,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 0, i32 0, <16 x i1> %1, <8 x i16> undef)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 0, <8 x i1> %1, <8 x i16> undef)
ret <8 x i16> %2
}
@@ -1250,8 +1250,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, i32 0, <8 x i1> %1, <4 x i32> undef)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 10, i32 0, i32 0, <4 x i1> %1, <4 x i32> undef)
ret <4 x i32> %2
}
@@ -1264,8 +1264,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 0, i32 0, <8 x i1> %1, <4 x i32> undef)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 0, <4 x i1> %1, <4 x i32> undef)
ret <4 x i32> %2
}
@@ -1278,8 +1278,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 6, i32 1, i32 0, <16 x i1> %1, <8 x i16> undef)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 6, i32 1, i32 0, <8 x i1> %1, <8 x i16> undef)
ret <8 x i16> %2
}
@@ -1292,8 +1292,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 1, i32 0, <16 x i1> %1, <8 x i16> undef)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 0, <8 x i1> %1, <8 x i16> undef)
ret <8 x i16> %2
}
@@ -1306,8 +1306,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 10, i32 1, i32 0, <8 x i1> %1, <4 x i32> undef)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 10, i32 1, i32 0, <4 x i1> %1, <4 x i32> undef)
ret <4 x i32> %2
}
@@ -1320,8 +1320,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 1, i32 0, <8 x i1> %1, <4 x i32> undef)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 0, <4 x i1> %1, <4 x i32> undef)
ret <4 x i32> %2
}
@@ -1334,8 +1334,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, i32 1, <16 x i1> %1, <8 x i16> undef)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 2, i32 0, i32 1, <8 x i1> %1, <8 x i16> undef)
ret <8 x i16> %2
}
@@ -1348,8 +1348,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 0, i32 1, <16 x i1> %1, <8 x i16> undef)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 1, <8 x i1> %1, <8 x i16> undef)
ret <8 x i16> %2
}
@@ -1362,8 +1362,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 6, i32 0, i32 1, <8 x i1> %1, <4 x i32> undef)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 6, i32 0, i32 1, <4 x i1> %1, <4 x i32> undef)
ret <4 x i32> %2
}
@@ -1376,8 +1376,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 0, i32 1, <8 x i1> %1, <4 x i32> undef)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 1, <4 x i1> %1, <4 x i32> undef)
ret <4 x i32> %2
}
@@ -1390,8 +1390,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 5, i32 1, i32 1, <16 x i1> %1, <8 x i16> undef)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 5, i32 1, i32 1, <8 x i1> %1, <8 x i16> undef)
ret <8 x i16> %2
}
@@ -1404,8 +1404,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
- %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 1, i32 1, <16 x i1> %1, <8 x i16> undef)
+ %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
+ %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 1, <8 x i1> %1, <8 x i16> undef)
ret <8 x i16> %2
}
@@ -1418,8 +1418,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 3, i32 1, i32 1, <8 x i1> %1, <4 x i32> undef)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 3, i32 1, i32 1, <4 x i1> %1, <4 x i32> undef)
ret <4 x i32> %2
}
@@ -1432,8 +1432,8 @@
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
- %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
- %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 1, i32 1, <8 x i1> %1, <4 x i32> undef)
+ %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
+ %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 1, <4 x i1> %1, <4 x i32> undef)
ret <4 x i32> %2
}
@@ -1472,5 +1472,5 @@
declare <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8>, i32, i32, i32)
declare <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16>, i32, i32, i32)
-declare <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8>, i32, i32, i32, <16 x i1>, <8 x i16>)
-declare <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16>, i32, i32, i32, <8 x i1>, <4 x i32>)
+declare <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8>, i32, i32, i32, <8 x i1>, <8 x i16>)
+declare <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16>, i32, i32, i32, <4 x i1>, <4 x i32>)
Index: llvm/lib/Target/ARM/ARMInstrMVE.td
===================================================================
--- llvm/lib/Target/ARM/ARMInstrMVE.td
+++ llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -239,7 +239,8 @@
// A family of classes wrapping up information about the vector types
// used by MVE.
-class MVEVectorVTInfo<ValueType vec, ValueType dblvec, ValueType pred,
+class MVEVectorVTInfo<ValueType vec, ValueType dblvec,
+ ValueType pred, ValueType dblpred,
bits<2> size, string suffixletter, bit unsigned> {
// The LLVM ValueType representing the vector, so we can use it in
// ISel patterns.
@@ -262,6 +263,9 @@
// directly.
ValueType Pred = pred;
+ // Same as Pred but for DblVec rather than Vec.
+ ValueType DblPred = dblpred;
+
// The most common representation of the vector element size in MVE
// instruction encodings: a 2-bit value V representing an (8<<V)-bit
// vector element.
@@ -288,31 +292,31 @@
}
// Integer vector types that don't treat signed and unsigned differently.
-def MVE_v16i8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "i", ?>;
-def MVE_v8i16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "i", ?>;
-def MVE_v4i32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "i", ?>;
-def MVE_v2i64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "i", ?>;
+def MVE_v16i8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "i", ?>;
+def MVE_v8i16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "i", ?>;
+def MVE_v4i32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "i", ?>;
+def MVE_v2i64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "i", ?>;
// Explicitly signed and unsigned integer vectors. They map to the
// same set of LLVM ValueTypes as above, but are represented
// differently in assembly and instruction encodings.
-def MVE_v16s8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "s", 0b0>;
-def MVE_v8s16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "s", 0b0>;
-def MVE_v4s32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "s", 0b0>;
-def MVE_v2s64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "s", 0b0>;
-def MVE_v16u8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "u", 0b1>;
-def MVE_v8u16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "u", 0b1>;
-def MVE_v4u32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "u", 0b1>;
-def MVE_v2u64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "u", 0b1>;
+def MVE_v16s8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "s", 0b0>;
+def MVE_v8s16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "s", 0b0>;
+def MVE_v4s32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "s", 0b0>;
+def MVE_v2s64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "s", 0b0>;
+def MVE_v16u8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "u", 0b1>;
+def MVE_v8u16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "u", 0b1>;
+def MVE_v4u32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "u", 0b1>;
+def MVE_v2u64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "u", 0b1>;
// FP vector types.
-def MVE_v8f16 : MVEVectorVTInfo<v8f16, v4f32, v8i1, 0b01, "f", ?>;
-def MVE_v4f32 : MVEVectorVTInfo<v4f32, v2f64, v4i1, 0b10, "f", ?>;
-def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, 0b11, "f", ?>;
+def MVE_v8f16 : MVEVectorVTInfo<v8f16, v4f32, v8i1, v4i1, 0b01, "f", ?>;
+def MVE_v4f32 : MVEVectorVTInfo<v4f32, v2f64, v4i1, v4i1, 0b10, "f", ?>;
+def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, ?, 0b11, "f", ?>;
// Polynomial vector types.
-def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b11, "p", 0b0>;
-def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b11, "p", 0b1>;
+def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b11, "p", 0b0>;
+def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b11, "p", 0b1>;
// --------- Start of base classes for the instructions themselves
@@ -2614,17 +2618,17 @@
def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), imm:$imm,
(i32 VTI.Unsigned), (i32 top),
- (VTI.Pred VCCR:$mask),
+ (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))>;
def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
(i32 VTI.Unsigned), (i32 top),
- (VTI.Pred VCCR:$mask),
+ (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (inst_lw (VTI.Vec MQPR:$src), ARMVCCThen,
- (VTI.Pred VCCR:$mask),
+ (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))>;
}
@@ -4282,10 +4286,10 @@
// Predicated multiply
def : Pat<(VTI.DblVec !con((pred_int (VTI.Vec MQPR:$Qm),
(VTI.Vec MQPR:$Qn)),
- uflag, (? (i32 Top), (VTI.Pred VCCR:$mask),
+ uflag, (? (i32 Top), (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))),
(VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))>;
}
}
Index: clang/test/CodeGen/arm-mve-intrinsics/vmulltq.c
===================================================================
--- clang/test/CodeGen/arm-mve-intrinsics/vmulltq.c
+++ clang/test/CodeGen/arm-mve-intrinsics/vmulltq.c
@@ -63,8 +63,8 @@
// CHECK-LABEL: @test_vmulltq_int_m_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, i32 1, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
int16x8_t test_vmulltq_int_m_s8(int16x8_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
@@ -79,8 +79,8 @@
// CHECK-LABEL: @test_vmulltq_int_m_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 1, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
uint32x4_t test_vmulltq_int_m_u16(uint32x4_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
@@ -111,8 +111,8 @@
// CHECK-LABEL: @test_vmulltq_poly_m_p8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vmulltq_poly_m_p8(uint16x8_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p)
@@ -127,8 +127,8 @@
// CHECK-LABEL: @test_vmulltq_int_x_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, i32 1, <16 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vmulltq_int_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p)
@@ -143,8 +143,8 @@
// CHECK-LABEL: @test_vmulltq_int_x_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, i32 1, <8 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vmulltq_int_x_s16(int16x8_t a, int16x8_t b, mve_pred16_t p)
@@ -175,8 +175,8 @@
// CHECK-LABEL: @test_vmulltq_poly_x_p8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vmulltq_poly_x_p8(uint8x16_t a, uint8x16_t b, mve_pred16_t p)
Index: clang/test/CodeGen/arm-mve-intrinsics/vmullbq.c
===================================================================
--- clang/test/CodeGen/arm-mve-intrinsics/vmullbq.c
+++ clang/test/CodeGen/arm-mve-intrinsics/vmullbq.c
@@ -63,8 +63,8 @@
// CHECK-LABEL: @test_vmullbq_int_m_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, i32 0, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
int16x8_t test_vmullbq_int_m_s8(int16x8_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
@@ -79,8 +79,8 @@
// CHECK-LABEL: @test_vmullbq_int_m_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 0, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
uint32x4_t test_vmullbq_int_m_u16(uint32x4_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
@@ -111,8 +111,8 @@
// CHECK-LABEL: @test_vmullbq_poly_m_p8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.poly.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vmullbq_poly_m_p8(uint16x8_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p)
@@ -127,8 +127,8 @@
// CHECK-LABEL: @test_vmullbq_int_x_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, i32 0, <16 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mull.int.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vmullbq_int_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p)
@@ -143,8 +143,8 @@
// CHECK-LABEL: @test_vmullbq_int_x_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, i32 0, <8 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mull.int.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vmullbq_int_x_s16(int16x8_t a, int16x8_t b, mve_pred16_t p)
@@ -175,8 +175,8 @@
// CHECK-LABEL: @test_vmullbq_poly_x_p16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mull.poly.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mull.poly.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
uint32x4_t test_vmullbq_poly_x_p16(uint16x8_t a, uint16x8_t b, mve_pred16_t p)
Index: clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
===================================================================
--- clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
+++ clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
@@ -1382,8 +1382,8 @@
// CHECK-LABEL: @test_vshllbq_m_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, i32 0, i32 0, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 6, i32 0, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
int16x8_t test_vshllbq_m_n_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p)
@@ -1398,8 +1398,8 @@
// CHECK-LABEL: @test_vshllbq_m_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 0, i32 0, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 10, i32 0, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vshllbq_m_n_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p)
@@ -1414,8 +1414,8 @@
// CHECK-LABEL: @test_vshllbq_m_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 3, i32 1, i32 0, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 3, i32 1, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vshllbq_m_n_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p)
@@ -1430,8 +1430,8 @@
// CHECK-LABEL: @test_vshllbq_m_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 14, i32 1, i32 0, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 14, i32 1, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
uint32x4_t test_vshllbq_m_n_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p)
@@ -1446,8 +1446,8 @@
// CHECK-LABEL: @test_vshlltq_m_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 0, i32 1, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 4, i32 0, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
int16x8_t test_vshlltq_m_n_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p)
@@ -1462,8 +1462,8 @@
// CHECK-LABEL: @test_vshlltq_m_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 12, i32 0, i32 1, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 12, i32 0, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vshlltq_m_n_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p)
@@ -1478,8 +1478,8 @@
// CHECK-LABEL: @test_vshlltq_m_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 1, i32 1, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 2, i32 1, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vshlltq_m_n_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p)
@@ -1494,8 +1494,8 @@
// CHECK-LABEL: @test_vshlltq_m_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 9, i32 1, i32 1, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 9, i32 1, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
uint32x4_t test_vshlltq_m_n_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p)
@@ -1510,8 +1510,8 @@
// CHECK-LABEL: @test_vshllbq_x_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 1, i32 0, i32 0, <16 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 0, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
int16x8_t test_vshllbq_x_n_s8(int8x16_t a, mve_pred16_t p)
@@ -1526,8 +1526,8 @@
// CHECK-LABEL: @test_vshllbq_x_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 0, i32 0, <8 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 10, i32 0, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vshllbq_x_n_s16(int16x8_t a, mve_pred16_t p)
@@ -1542,8 +1542,8 @@
// CHECK-LABEL: @test_vshllbq_x_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, i32 1, i32 0, <16 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 6, i32 1, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vshllbq_x_n_u8(uint8x16_t a, mve_pred16_t p)
@@ -1558,8 +1558,8 @@
// CHECK-LABEL: @test_vshllbq_x_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 1, i32 0, <8 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 10, i32 1, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
uint32x4_t test_vshllbq_x_n_u16(uint16x8_t a, mve_pred16_t p)
@@ -1574,8 +1574,8 @@
// CHECK-LABEL: @test_vshlltq_x_n_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 0, i32 1, <16 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 2, i32 0, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
int16x8_t test_vshlltq_x_n_s8(int8x16_t a, mve_pred16_t p)
@@ -1590,8 +1590,8 @@
// CHECK-LABEL: @test_vshlltq_x_n_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 6, i32 0, i32 1, <8 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 6, i32 0, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
int32x4_t test_vshlltq_x_n_s16(int16x8_t a, mve_pred16_t p)
@@ -1606,8 +1606,8 @@
// CHECK-LABEL: @test_vshlltq_x_n_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 5, i32 1, i32 1, <16 x i1> [[TMP1]], <8 x i16> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 5, i32 1, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
//
uint16x8_t test_vshlltq_x_n_u8(uint8x16_t a, mve_pred16_t p)
@@ -1622,8 +1622,8 @@
// CHECK-LABEL: @test_vshlltq_x_n_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 3, i32 1, i32 1, <8 x i1> [[TMP1]], <4 x i32> undef)
+// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 3, i32 1, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
//
uint32x4_t test_vshlltq_x_n_u16(uint16x8_t a, mve_pred16_t p)
Index: clang/include/clang/Basic/arm_mve_defs.td
===================================================================
--- clang/include/clang/Basic/arm_mve_defs.td
+++ clang/include/clang/Basic/arm_mve_defs.td
@@ -318,9 +318,11 @@
def SVector: VecOf<SScalar>;
// DblVector expands to a vector of scalars of size twice the size of Scalar.
+// DblPredicate expands to a predicate corresponding to DblVector
// HalfVector, similarly, expands to a vector of half-sized scalars. And
// UHalfVector is a vector of half-sized _unsigned integers_.
def DblVector: VecOf<DoubleSize<Scalar>>;
+def DblPredicate: PredOf<DoubleSize<Scalar>>;
def HalfVector: VecOf<HalfSize<Scalar>>;
def UHalfVector: VecOf<Unsigned<HalfSize<Scalar>>>;
Index: clang/include/clang/Basic/arm_mve.td
===================================================================
--- clang/include/clang/Basic/arm_mve.td
+++ clang/include/clang/Basic/arm_mve.td
@@ -332,8 +332,8 @@
multiclass DblVectorVectorArithmetic<string operation, dag extraArgs = (?)> {
defm "" : IntrinsicMX<
- DblVector, (args Vector:$a, Vector:$b, Predicate:$pred),
- !con((IRInt<operation, [DblVector, Vector, Predicate]> $a, $b),
+ DblVector, (args Vector:$a, Vector:$b, DblPredicate:$pred),
+ !con((IRInt<operation, [DblVector, Vector, DblPredicate]> $a, $b),
extraArgs, (? $pred, $inactive))>;
}
@@ -881,8 +881,8 @@
(IRInt<"vshll_imm", [DblVector, Vector]>
$v, $sh, (unsignedflag Scalar), top)>;
defm "": IntrinsicMX<DblVector, (args Vector:$v, imm_1toN:$sh,
- Predicate:$pred),
- (IRInt<"vshll_imm_predicated", [DblVector, Vector, Predicate]>
+ DblPredicate:$pred),
+ (IRInt<"vshll_imm_predicated", [DblVector, Vector, DblPredicate]>
$v, $sh, (unsignedflag Scalar), top, $pred, $inactive), 1, "_n">;
}
}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits