Conanap updated this revision to Diff 343115.
Conanap marked 2 inline comments as done.
Conanap added a comment.
Updated `COPY_TO_REGCLASS` to `SUBREG_TO_REG`, added a test case.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D100782/new/
https://reviews.llvm.org/D100782
Files:
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
Index: llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
+++ llvm/test/CodeGen/PowerPC/vec_insert_elt.ll
@@ -200,21 +200,19 @@
; CHECK-LABEL: testFloat1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xscvdpspn vs0, f1
-; CHECK-NEXT: extsw r3, r6
-; CHECK-NEXT: slwi r3, r3, 2
-; CHECK-NEXT: xxsldwi vs0, vs0, vs0, 3
-; CHECK-NEXT: mffprwz r4, f0
-; CHECK-NEXT: vinswrx v2, r3, r4
+; CHECK-NEXT: extsw r4, r6
+; CHECK-NEXT: slwi r4, r4, 2
+; CHECK-NEXT: mffprwz r3, f0
+; CHECK-NEXT: vinswrx v2, r4, r3
; CHECK-NEXT: blr
;
; CHECK-BE-LABEL: testFloat1:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xscvdpspn vs0, f1
-; CHECK-BE-NEXT: extsw r3, r6
-; CHECK-BE-NEXT: slwi r3, r3, 2
-; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3
-; CHECK-BE-NEXT: mffprwz r4, f0
-; CHECK-BE-NEXT: vinswlx v2, r3, r4
+; CHECK-BE-NEXT: extsw r4, r6
+; CHECK-BE-NEXT: slwi r4, r4, 2
+; CHECK-BE-NEXT: mffprwz r3, f0
+; CHECK-BE-NEXT: vinswlx v2, r4, r3
; CHECK-BE-NEXT: blr
;
; CHECK-P9-LABEL: testFloat1:
@@ -346,7 +344,6 @@
; CHECK-LABEL: testFloatImm1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xscvdpspn vs0, f1
-; CHECK-NEXT: xxsldwi vs0, vs0, vs0, 3
; CHECK-NEXT: xxinsertw v2, vs0, 12
; CHECK-NEXT: xxinsertw v2, vs0, 4
; CHECK-NEXT: blr
@@ -354,7 +351,6 @@
; CHECK-BE-LABEL: testFloatImm1:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xscvdpspn vs0, f1
-; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3
; CHECK-BE-NEXT: xxinsertw v2, vs0, 0
; CHECK-BE-NEXT: xxinsertw v2, vs0, 8
; CHECK-BE-NEXT: blr
@@ -362,7 +358,6 @@
; CHECK-P9-LABEL: testFloatImm1:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: xscvdpspn vs0, f1
-; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
; CHECK-P9-NEXT: xxinsertw v2, vs0, 0
; CHECK-P9-NEXT: xxinsertw v2, vs0, 8
; CHECK-P9-NEXT: blr
@@ -393,11 +388,9 @@
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9-NEXT: lfs f0, 0(r5)
; CHECK-P9-NEXT: xscvdpspn vs0, f0
-; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
; CHECK-P9-NEXT: xxinsertw v2, vs0, 0
; CHECK-P9-NEXT: lfs f0, 4(r5)
; CHECK-P9-NEXT: xscvdpspn vs0, f0
-; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
; CHECK-P9-NEXT: xxinsertw v2, vs0, 8
; CHECK-P9-NEXT: blr
entry:
@@ -439,11 +432,9 @@
; CHECK-P9-NEXT: li r3, 1
; CHECK-P9-NEXT: rldic r3, r3, 38, 25
; CHECK-P9-NEXT: xscvdpspn vs0, f0
-; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
; CHECK-P9-NEXT: xxinsertw v2, vs0, 0
; CHECK-P9-NEXT: lfsx f0, r5, r3
; CHECK-P9-NEXT: xscvdpspn vs0, f0
-; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 3
; CHECK-P9-NEXT: xxinsertw v2, vs0, 8
; CHECK-P9-NEXT: blr
entry:
@@ -738,3 +729,28 @@
ret <2 x double> %vecins
}
+define dso_local <4 x float> @testInsertDoubleToFloat(<4 x float> %a, double %b) local_unnamed_addr #0 {
+; CHECK-LABEL: testInsertDoubleToFloat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xscvdpsp f0, f1
+; CHECK-NEXT: xxinsertw v2, vs0, 8
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: testInsertDoubleToFloat:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xsrsp f0, f1
+; CHECK-BE-NEXT: xscvdpspn vs0, f0
+; CHECK-BE-NEXT: xxinsertw v2, vs0, 4
+; CHECK-BE-NEXT: blr
+;
+; CHECK-P9-LABEL: testInsertDoubleToFloat:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: xsrsp f0, f1
+; CHECK-P9-NEXT: xscvdpspn vs0, f0
+; CHECK-P9-NEXT: xxinsertw v2, vs0, 4
+; CHECK-P9-NEXT: blr
+entry:
+ %conv = fptrunc double %b to float
+ %vecins = insertelement <4 x float> %a, float %conv, i32 1
+ ret <4 x float> %vecins
+}
Index: llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
+++ llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
@@ -216,7 +216,6 @@
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfs f0, 0(r3)
; P9LE-NEXT: xscvdpspn vs0, f0
-; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; P9LE-NEXT: xxinsertw v2, vs0, 12
; P9LE-NEXT: blr
;
@@ -224,7 +223,6 @@
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfs f0, 0(r3)
; P9BE-NEXT: xscvdpspn vs0, f0
-; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3
; P9BE-NEXT: xxinsertw v2, vs0, 0
; P9BE-NEXT: blr
;
Index: llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
+++ llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
@@ -506,11 +506,9 @@
entry:
; CHECK-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_
; CHECK: xscvdpspn 0, 1
-; CHECK: xxsldwi 0, 0, 0, 3
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_
; CHECK-BE: xscvdpspn 0, 1
-; CHECK-BE: xxsldwi 0, 0, 0, 3
; CHECK-BE: xxinsertw 34, 0, 0
%vecins = insertelement <4 x float> %a, float %b, i32 0
ret <4 x float> %vecins
@@ -520,11 +518,9 @@
entry:
; CHECK-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_
; CHECK: xscvdpspn 0, 1
-; CHECK: xxsldwi 0, 0, 0, 3
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_
; CHECK-BE: xscvdpspn 0, 1
-; CHECK-BE: xxsldwi 0, 0, 0, 3
; CHECK-BE: xxinsertw 34, 0, 4
%vecins = insertelement <4 x float> %a, float %b, i32 1
ret <4 x float> %vecins
@@ -534,11 +530,9 @@
entry:
; CHECK-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_
; CHECK: xscvdpspn 0, 1
-; CHECK: xxsldwi 0, 0, 0, 3
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_
; CHECK-BE: xscvdpspn 0, 1
-; CHECK-BE: xxsldwi 0, 0, 0, 3
; CHECK-BE: xxinsertw 34, 0, 8
%vecins = insertelement <4 x float> %a, float %b, i32 2
ret <4 x float> %vecins
@@ -548,11 +542,9 @@
entry:
; CHECK-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_
; CHECK: xscvdpspn 0, 1
-; CHECK: xxsldwi 0, 0, 0, 3
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_
; CHECK-BE: xscvdpspn 0, 1
-; CHECK-BE: xxsldwi 0, 0, 0, 3
; CHECK-BE: xxinsertw 34, 0, 12
%vecins = insertelement <4 x float> %a, float %b, i32 3
ret <4 x float> %vecins
Index: llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
+++ llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
@@ -10,8 +10,8 @@
; CHECK-P7: stfs 1,
; CHECK-P7: lwa 3,
; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1
-; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3
-; CHECK: mffprwz 3, [[SHIFTREG]]
+; CHECK-NOT: xxsldwi
+; CHECK: mffprwz 3, [[CONVREG]]
}
define i64 @f64toi64(double %a) {
@@ -50,8 +50,8 @@
; CHECK-P7: stfs 1,
; CHECK-P7: lwz 3,
; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1
-; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3
-; CHECK: mffprwz 3, [[SHIFTREG]]
+; CHECK-NOT: xxsldwi
+; CHECK: mffprwz 3, [[CONVREG]]
}
define i64 @f64toi64u(double %a) {
Index: llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
+++ llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
@@ -203,18 +203,18 @@
define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
; CHECK-64-LABEL: testFloat2:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-DAG: lwz 6, 0(3)
-; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
-; CHECK-64-DAG: addi 7, 1, -32
-; CHECK-64-DAG: stxv 34, -32(1)
-; CHECK-64-DAG: stwx 6, 7, 4
-; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29
-; CHECK-64-DAG: addi 5, 1, -16
-; CHECK-64-DAG: lxv 0, -32(1)
-; CHECK-64-DAG: lwz 3, 1(3)
-; CHECK-64-DAG: stxv 0, -16(1)
-; CHECK-64-DAG: stwx 3, 5, 4
-; CHECK-64-DAG: lxv 34, -16(1)
+; CHECK-64-NEXT: lwz 6, 0(3)
+; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
+; CHECK-64-NEXT: addi 7, 1, -32
+; CHECK-64-NEXT: stxv 34, -32(1)
+; CHECK-64-NEXT: stwx 6, 7, 4
+; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29
+; CHECK-64-NEXT: addi 5, 1, -16
+; CHECK-64-NEXT: lxv 0, -32(1)
+; CHECK-64-NEXT: lwz 3, 1(3)
+; CHECK-64-NEXT: stxv 0, -16(1)
+; CHECK-64-NEXT: stwx 3, 5, 4
+; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: testFloat2:
@@ -246,21 +246,21 @@
define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
; CHECK-64-LABEL: testFloat3:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-DAG: lis 6, 1
-; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
-; CHECK-64-DAG: addi 7, 1, -32
-; CHECK-64-DAG: lwzx 6, 3, 6
-; CHECK-64-DAG: stxv 34, -32(1)
-; CHECK-64-DAG: stwx 6, 7, 4
-; CHECK-64-DAG: li 4, 1
-; CHECK-64-DAG: lxv 0, -32(1)
-; CHECK-64-DAG: rldic 4, 4, 36, 27
-; CHECK-64-DAG: lwzx 3, 3, 4
-; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29
-; CHECK-64-DAG: addi 5, 1, -16
-; CHECK-64-DAG: stxv 0, -16(1)
-; CHECK-64-DAG: stwx 3, 5, 4
-; CHECK-64-DAG: lxv 34, -16(1)
+; CHECK-64-NEXT: lis 6, 1
+; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
+; CHECK-64-NEXT: addi 7, 1, -32
+; CHECK-64-NEXT: lwzx 6, 3, 6
+; CHECK-64-NEXT: stxv 34, -32(1)
+; CHECK-64-NEXT: stwx 6, 7, 4
+; CHECK-64-NEXT: li 4, 1
+; CHECK-64-NEXT: lxv 0, -32(1)
+; CHECK-64-NEXT: rldic 4, 4, 36, 27
+; CHECK-64-NEXT: lwzx 3, 3, 4
+; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29
+; CHECK-64-NEXT: addi 5, 1, -16
+; CHECK-64-NEXT: stxv 0, -16(1)
+; CHECK-64-NEXT: stwx 3, 5, 4
+; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: testFloat3:
@@ -297,7 +297,6 @@
; CHECK-64-LABEL: testFloatImm1:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: xscvdpspn 0, 1
-; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-64-NEXT: xxinsertw 34, 0, 0
; CHECK-64-NEXT: xxinsertw 34, 0, 8
; CHECK-64-NEXT: blr
@@ -305,7 +304,6 @@
; CHECK-32-LABEL: testFloatImm1:
; CHECK-32: # %bb.0: # %entry
; CHECK-32-NEXT: xscvdpspn 0, 1
-; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-NEXT: xxinsertw 34, 0, 0
; CHECK-32-NEXT: xxinsertw 34, 0, 8
; CHECK-32-NEXT: blr
@@ -320,11 +318,9 @@
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: lfs 0, 0(3)
; CHECK-64-NEXT: xscvdpspn 0, 0
-; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-64-NEXT: xxinsertw 34, 0, 0
; CHECK-64-NEXT: lfs 0, 4(3)
; CHECK-64-NEXT: xscvdpspn 0, 0
-; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-64-NEXT: xxinsertw 34, 0, 8
; CHECK-64-NEXT: blr
;
@@ -332,11 +328,9 @@
; CHECK-32: # %bb.0: # %entry
; CHECK-32-NEXT: lfs 0, 0(3)
; CHECK-32-NEXT: xscvdpspn 0, 0
-; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-NEXT: xxinsertw 34, 0, 0
; CHECK-32-NEXT: lfs 0, 4(3)
; CHECK-32-NEXT: xscvdpspn 0, 0
-; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-NEXT: xxinsertw 34, 0, 8
; CHECK-32-NEXT: blr
entry:
@@ -358,11 +352,9 @@
; CHECK-64-NEXT: li 4, 1
; CHECK-64-NEXT: rldic 4, 4, 38, 25
; CHECK-64-NEXT: xscvdpspn 0, 0
-; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-64-NEXT: xxinsertw 34, 0, 0
; CHECK-64-NEXT: lfsx 0, 3, 4
; CHECK-64-NEXT: xscvdpspn 0, 0
-; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-64-NEXT: xxinsertw 34, 0, 8
; CHECK-64-NEXT: blr
;
@@ -371,11 +363,9 @@
; CHECK-32-NEXT: lis 4, 4
; CHECK-32-NEXT: lfsx 0, 3, 4
; CHECK-32-NEXT: xscvdpspn 0, 0
-; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-NEXT: xxinsertw 34, 0, 0
; CHECK-32-NEXT: lfs 0, 0(3)
; CHECK-32-NEXT: xscvdpspn 0, 0
-; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-NEXT: xxinsertw 34, 0, 8
; CHECK-32-NEXT: blr
entry:
@@ -418,19 +408,19 @@
define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
; CHECK-64-LABEL: testDouble2:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-DAG: ld 6, 0(3)
-; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
-; CHECK-64-DAG: addi 7, 1, -32
-; CHECK-64-DAG: stxv 34, -32(1)
-; CHECK-64-DAG: stdx 6, 7, 4
-; CHECK-64-DAG: li 4, 1
-; CHECK-64-DAG: lxv 0, -32(1)
-; CHECK-64-DAG: ldx 3, 3, 4
-; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28
-; CHECK-64-DAG: addi 5, 1, -16
-; CHECK-64-DAG: stxv 0, -16(1)
-; CHECK-64-DAG: stdx 3, 5, 4
-; CHECK-64-DAG: lxv 34, -16(1)
+; CHECK-64-NEXT: ld 6, 0(3)
+; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
+; CHECK-64-NEXT: addi 7, 1, -32
+; CHECK-64-NEXT: stxv 34, -32(1)
+; CHECK-64-NEXT: stdx 6, 7, 4
+; CHECK-64-NEXT: li 4, 1
+; CHECK-64-NEXT: lxv 0, -32(1)
+; CHECK-64-NEXT: ldx 3, 3, 4
+; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28
+; CHECK-64-NEXT: addi 5, 1, -16
+; CHECK-64-NEXT: stxv 0, -16(1)
+; CHECK-64-NEXT: stdx 3, 5, 4
+; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: testDouble2:
@@ -462,21 +452,21 @@
define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
; CHECK-64-LABEL: testDouble3:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-DAG: lis 6, 1
-; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
-; CHECK-64-DAG: addi 7, 1, -32
-; CHECK-64-DAG: ldx 6, 3, 6
-; CHECK-64-DAG: stxv 34, -32(1)
-; CHECK-64-DAG: stdx 6, 7, 4
-; CHECK-64-DAG: li 4, 1
-; CHECK-64-DAG: lxv 0, -32(1)
-; CHECK-64-DAG: rldic 4, 4, 36, 27
-; CHECK-64-DAG: ldx 3, 3, 4
-; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28
-; CHECK-64-DAG: addi 5, 1, -16
-; CHECK-64-DAG: stxv 0, -16(1)
-; CHECK-64-DAG: stdx 3, 5, 4
-; CHECK-64-DAG: lxv 34, -16(1)
+; CHECK-64-NEXT: lis 6, 1
+; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
+; CHECK-64-NEXT: addi 7, 1, -32
+; CHECK-64-NEXT: ldx 6, 3, 6
+; CHECK-64-NEXT: stxv 34, -32(1)
+; CHECK-64-NEXT: stdx 6, 7, 4
+; CHECK-64-NEXT: li 4, 1
+; CHECK-64-NEXT: lxv 0, -32(1)
+; CHECK-64-NEXT: rldic 4, 4, 36, 27
+; CHECK-64-NEXT: ldx 3, 3, 4
+; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28
+; CHECK-64-NEXT: addi 5, 1, -16
+; CHECK-64-NEXT: stxv 0, -16(1)
+; CHECK-64-NEXT: stdx 3, 5, 4
+; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: testDouble3:
Index: llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
+++ llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
@@ -743,14 +743,12 @@
; CHECK-64-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: xscvdpspn 0, 1
-; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-64-NEXT: xxinsertw 34, 0, 0
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_:
; CHECK-32: # %bb.0: # %entry
; CHECK-32-NEXT: xscvdpspn 0, 1
-; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-NEXT: xxinsertw 34, 0, 0
; CHECK-32-NEXT: blr
entry:
@@ -762,14 +760,12 @@
; CHECK-64-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: xscvdpspn 0, 1
-; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-64-NEXT: xxinsertw 34, 0, 4
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_:
; CHECK-32: # %bb.0: # %entry
; CHECK-32-NEXT: xscvdpspn 0, 1
-; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-NEXT: xxinsertw 34, 0, 4
; CHECK-32-NEXT: blr
entry:
@@ -781,14 +777,12 @@
; CHECK-64-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: xscvdpspn 0, 1
-; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-64-NEXT: xxinsertw 34, 0, 8
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_:
; CHECK-32: # %bb.0: # %entry
; CHECK-32-NEXT: xscvdpspn 0, 1
-; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-NEXT: xxinsertw 34, 0, 8
; CHECK-32-NEXT: blr
entry:
@@ -800,14 +794,12 @@
; CHECK-64-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: xscvdpspn 0, 1
-; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-64-NEXT: xxinsertw 34, 0, 12
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_:
; CHECK-32: # %bb.0: # %entry
; CHECK-32-NEXT: xscvdpspn 0, 1
-; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-NEXT: xxinsertw 34, 0, 12
; CHECK-32-NEXT: blr
entry:
Index: llvm/lib/Target/PowerPC/PPCInstrVSX.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1816,8 +1816,7 @@
// Output dag used to bitcast f32 to i32 and f64 to i64
def Bitcast {
- dag FltToInt = (i32 (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI (XSCVDPSPN $A),
- (XSCVDPSPN $A), 3), sub_64)));
+ dag FltToInt = (i32 (MFVSRWZ (EXTRACT_SUBREG (XSCVDPSPN $A), sub_64)));
dag DblToLong = (i64 (MFVSRD $A));
}
@@ -2212,7 +2211,7 @@
}
def AlignValues {
- dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3));
+ dag F32_TO_BE_WORD1 = (v4f32 (XSCVDPSPN $B));
dag I32_TO_BE_WORD1 = (SUBREG_TO_REG (i64 1), (MTVSRWZ $B), sub_64);
}
@@ -2796,6 +2795,9 @@
v4i32, FltToUIntLoad.A,
(XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 ForceXForm:$A)), sub_64), 1),
(SUBREG_TO_REG (i64 1), (XSCVDPUXWSs (XFLOADf32 ForceXForm:$A)), sub_64)>;
+def : Pat<(v4f32 (build_vector (f32 (fpround f64:$A)), (f32 (fpround f64:$A)),
+ (f32 (fpround f64:$A)), (f32 (fpround f64:$A)))),
+ (v4f32 (XXSPLTW (SUBREG_TO_REG (i64 0), (XSCVDPSP f64:$A), sub_64), 0))>;
def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
(v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
def : Pat<(v2f64 (PPCldsplat ForceXForm:$A)),
@@ -4271,12 +4273,24 @@
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 0)),
+ (v4f32 (XXINSERTW v4f32:$A,
+ (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 12))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 1)),
+ (v4f32 (XXINSERTW v4f32:$A,
+ (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 8))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 2)),
+ (v4f32 (XXINSERTW v4f32:$A,
+ (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 4))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, (f32 (fpround f64:$B)), 3)),
+ (v4f32 (XXINSERTW v4f32:$A,
+ (SUBREG_TO_REG (i64 1), (XSCVDPSP f64:$B), sub_64), 0))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits