This revision was automatically updated to reflect the committed changes.
Closed by commit rG88ff4d2ca1a0: [PowerPC] Fix STRICT_FRINT/STRICT_FNEARBYINT
lowering (authored by qiucf).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D87220/new/
https://reviews.llvm.org/D87220
Files:
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/builtins-ppc-fpconstrained.c
clang/test/CodeGen/builtins-ppc-vsx.c
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/fp-strict-round.ll
llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
Index: llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -4899,19 +4899,50 @@
define <2 x double> @constrained_vector_nearbyint_v2f64() #0 {
; PC64LE-LABEL: constrained_vector_nearbyint_v2f64:
; PC64LE: # %bb.0: # %entry
+; PC64LE-NEXT: mflr 0
+; PC64LE-NEXT: std 0, 16(1)
+; PC64LE-NEXT: stdu 1, -64(1)
; PC64LE-NEXT: addis 3, 2, .LCPI81_0@toc@ha
-; PC64LE-NEXT: addi 3, 3, .LCPI81_0@toc@l
-; PC64LE-NEXT: lxvd2x 0, 0, 3
-; PC64LE-NEXT: xxswapd 0, 0
-; PC64LE-NEXT: xvrdpic 34, 0
+; PC64LE-NEXT: lfd 1, .LCPI81_0@toc@l(3)
+; PC64LE-NEXT: bl nearbyint
+; PC64LE-NEXT: nop
+; PC64LE-NEXT: li 3, 48
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT: addis 3, 2, .LCPI81_1@toc@ha
+; PC64LE-NEXT: lfs 1, .LCPI81_1@toc@l(3)
+; PC64LE-NEXT: bl nearbyint
+; PC64LE-NEXT: nop
+; PC64LE-NEXT: li 3, 48
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxmrghd 34, 1, 0
+; PC64LE-NEXT: addi 1, 1, 64
+; PC64LE-NEXT: ld 0, 16(1)
+; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64:
; PC64LE9: # %bb.0: # %entry
+; PC64LE9-NEXT: mflr 0
+; PC64LE9-NEXT: std 0, 16(1)
+; PC64LE9-NEXT: stdu 1, -48(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI81_0@toc@ha
-; PC64LE9-NEXT: addi 3, 3, .LCPI81_0@toc@l
-; PC64LE9-NEXT: lxvx 0, 0, 3
-; PC64LE9-NEXT: xvrdpic 34, 0
+; PC64LE9-NEXT: lfd 1, .LCPI81_0@toc@l(3)
+; PC64LE9-NEXT: bl nearbyint
+; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: addis 3, 2, .LCPI81_1@toc@ha
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT: lfs 1, .LCPI81_1@toc@l(3)
+; PC64LE9-NEXT: bl nearbyint
+; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT: xxmrghd 34, 1, 0
+; PC64LE9-NEXT: addi 1, 1, 48
+; PC64LE9-NEXT: ld 0, 16(1)
+; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: blr
entry:
%nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
@@ -5010,31 +5041,72 @@
define <3 x double> @constrained_vector_nearby_v3f64() #0 {
; PC64LE-LABEL: constrained_vector_nearby_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha
-; PC64LE-NEXT: addi 3, 3, .LCPI83_1@toc@l
-; PC64LE-NEXT: lxvd2x 0, 0, 3
+; PC64LE-NEXT: mflr 0
+; PC64LE-NEXT: std 0, 16(1)
+; PC64LE-NEXT: stdu 1, -80(1)
+; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI83_0@toc@ha
; PC64LE-NEXT: lfd 1, .LCPI83_0@toc@l(3)
-; PC64LE-NEXT: xxswapd 0, 0
-; PC64LE-NEXT: xsrdpic 3, 1
-; PC64LE-NEXT: xvrdpic 2, 0
-; PC64LE-NEXT: xxswapd 1, 2
-; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT: bl nearbyint
+; PC64LE-NEXT: nop
+; PC64LE-NEXT: li 3, 48
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT: addis 3, 2, .LCPI83_1@toc@ha
+; PC64LE-NEXT: lfs 1, .LCPI83_1@toc@l(3)
+; PC64LE-NEXT: bl nearbyint
+; PC64LE-NEXT: nop
+; PC64LE-NEXT: li 3, 48
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: addis 3, 2, .LCPI83_2@toc@ha
+; PC64LE-NEXT: xxmrghd 63, 0, 1
+; PC64LE-NEXT: lfd 1, .LCPI83_2@toc@l(3)
+; PC64LE-NEXT: bl nearbyint
+; PC64LE-NEXT: nop
+; PC64LE-NEXT: xxswapd 0, 63
+; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: xxlor 2, 63, 63
+; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: fmr 3, 1
+; PC64LE-NEXT: fmr 1, 0
+; PC64LE-NEXT: addi 1, 1, 80
+; PC64LE-NEXT: ld 0, 16(1)
+; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_nearby_v3f64:
; PC64LE9: # %bb.0: # %entry
+; PC64LE9-NEXT: mflr 0
+; PC64LE9-NEXT: std 0, 16(1)
+; PC64LE9-NEXT: stdu 1, -64(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI83_0@toc@ha
-; PC64LE9-NEXT: lfd 0, .LCPI83_0@toc@l(3)
+; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT: lfd 1, .LCPI83_0@toc@l(3)
+; PC64LE9-NEXT: bl nearbyint
+; PC64LE9-NEXT: nop
; PC64LE9-NEXT: addis 3, 2, .LCPI83_1@toc@ha
-; PC64LE9-NEXT: addi 3, 3, .LCPI83_1@toc@l
-; PC64LE9-NEXT: xsrdpic 3, 0
-; PC64LE9-NEXT: lxvx 0, 0, 3
-; PC64LE9-NEXT: xvrdpic 2, 0
-; PC64LE9-NEXT: xxswapd 1, 2
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT: lfs 1, .LCPI83_1@toc@l(3)
+; PC64LE9-NEXT: bl nearbyint
+; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: addis 3, 2, .LCPI83_2@toc@ha
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT: xxmrghd 63, 0, 1
+; PC64LE9-NEXT: lfd 1, .LCPI83_2@toc@l(3)
+; PC64LE9-NEXT: bl nearbyint
+; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: fmr 3, 1
+; PC64LE9-NEXT: xxswapd 1, 63
+; PC64LE9-NEXT: xscpsgndp 2, 63, 63
+; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT: addi 1, 1, 64
+; PC64LE9-NEXT: ld 0, 16(1)
+; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: blr
entry:
%nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
@@ -5047,28 +5119,86 @@
define <4 x double> @constrained_vector_nearbyint_v4f64() #0 {
; PC64LE-LABEL: constrained_vector_nearbyint_v4f64:
; PC64LE: # %bb.0: # %entry
+; PC64LE-NEXT: mflr 0
+; PC64LE-NEXT: std 0, 16(1)
+; PC64LE-NEXT: stdu 1, -80(1)
+; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
; PC64LE-NEXT: addis 3, 2, .LCPI84_0@toc@ha
-; PC64LE-NEXT: addis 4, 2, .LCPI84_1@toc@ha
-; PC64LE-NEXT: addi 3, 3, .LCPI84_0@toc@l
-; PC64LE-NEXT: lxvd2x 0, 0, 3
-; PC64LE-NEXT: addi 3, 4, .LCPI84_1@toc@l
-; PC64LE-NEXT: lxvd2x 1, 0, 3
-; PC64LE-NEXT: xxswapd 0, 0
-; PC64LE-NEXT: xxswapd 1, 1
-; PC64LE-NEXT: xvrdpic 35, 0
-; PC64LE-NEXT: xvrdpic 34, 1
+; PC64LE-NEXT: lfd 1, .LCPI84_0@toc@l(3)
+; PC64LE-NEXT: bl nearbyint
+; PC64LE-NEXT: nop
+; PC64LE-NEXT: li 3, 48
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT: addis 3, 2, .LCPI84_1@toc@ha
+; PC64LE-NEXT: lfd 1, .LCPI84_1@toc@l(3)
+; PC64LE-NEXT: bl nearbyint
+; PC64LE-NEXT: nop
+; PC64LE-NEXT: li 3, 48
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: addis 3, 2, .LCPI84_2@toc@ha
+; PC64LE-NEXT: xxmrghd 63, 1, 0
+; PC64LE-NEXT: lfd 1, .LCPI84_2@toc@l(3)
+; PC64LE-NEXT: bl nearbyint
+; PC64LE-NEXT: nop
+; PC64LE-NEXT: li 3, 48
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT: addis 3, 2, .LCPI84_3@toc@ha
+; PC64LE-NEXT: lfd 1, .LCPI84_3@toc@l(3)
+; PC64LE-NEXT: bl nearbyint
+; PC64LE-NEXT: nop
+; PC64LE-NEXT: li 3, 48
+; PC64LE-NEXT: vmr 2, 31
+; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: li 3, 64
+; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT: xxmrghd 35, 1, 0
+; PC64LE-NEXT: addi 1, 1, 80
+; PC64LE-NEXT: ld 0, 16(1)
+; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64:
; PC64LE9: # %bb.0: # %entry
+; PC64LE9-NEXT: mflr 0
+; PC64LE9-NEXT: std 0, 16(1)
+; PC64LE9-NEXT: stdu 1, -64(1)
; PC64LE9-NEXT: addis 3, 2, .LCPI84_0@toc@ha
-; PC64LE9-NEXT: addi 3, 3, .LCPI84_0@toc@l
-; PC64LE9-NEXT: lxvx 0, 0, 3
+; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT: lfd 1, .LCPI84_0@toc@l(3)
+; PC64LE9-NEXT: bl nearbyint
+; PC64LE9-NEXT: nop
; PC64LE9-NEXT: addis 3, 2, .LCPI84_1@toc@ha
-; PC64LE9-NEXT: addi 3, 3, .LCPI84_1@toc@l
-; PC64LE9-NEXT: xvrdpic 35, 0
-; PC64LE9-NEXT: lxvx 0, 0, 3
-; PC64LE9-NEXT: xvrdpic 34, 0
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT: lfd 1, .LCPI84_1@toc@l(3)
+; PC64LE9-NEXT: bl nearbyint
+; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: addis 3, 2, .LCPI84_2@toc@ha
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT: xxmrghd 63, 1, 0
+; PC64LE9-NEXT: lfd 1, .LCPI84_2@toc@l(3)
+; PC64LE9-NEXT: bl nearbyint
+; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: addis 3, 2, .LCPI84_3@toc@ha
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT: lfd 1, .LCPI84_3@toc@l(3)
+; PC64LE9-NEXT: bl nearbyint
+; PC64LE9-NEXT: nop
+; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: vmr 2, 31
+; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT: xxmrghd 35, 1, 0
+; PC64LE9-NEXT: addi 1, 1, 64
+; PC64LE9-NEXT: ld 0, 16(1)
+; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: blr
entry:
%nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
Index: llvm/test/CodeGen/PowerPC/fp-strict-round.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/fp-strict-round.ll
+++ llvm/test/CodeGen/PowerPC/fp-strict-round.ll
@@ -170,12 +170,30 @@
define double @nearbyint_f64(double %f1, double %f2) {
; P8-LABEL: nearbyint_f64:
; P8: # %bb.0:
-; P8-NEXT: xsrdpic f1, f1
+; P8-NEXT: mflr r0
+; P8-NEXT: std r0, 16(r1)
+; P8-NEXT: stdu r1, -112(r1)
+; P8-NEXT: .cfi_def_cfa_offset 112
+; P8-NEXT: .cfi_offset lr, 16
+; P8-NEXT: bl nearbyint
+; P8-NEXT: nop
+; P8-NEXT: addi r1, r1, 112
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
; P8-NEXT: blr
;
; P9-LABEL: nearbyint_f64:
; P9: # %bb.0:
-; P9-NEXT: xsrdpic f1, f1
+; P9-NEXT: mflr r0
+; P9-NEXT: std r0, 16(r1)
+; P9-NEXT: stdu r1, -32(r1)
+; P9-NEXT: .cfi_def_cfa_offset 32
+; P9-NEXT: .cfi_offset lr, 16
+; P9-NEXT: bl nearbyint
+; P9-NEXT: nop
+; P9-NEXT: addi r1, r1, 32
+; P9-NEXT: ld r0, 16(r1)
+; P9-NEXT: mtlr r0
; P9-NEXT: blr
%res = call double @llvm.experimental.constrained.nearbyint.f64(
double %f1,
@@ -187,12 +205,104 @@
define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) {
; P8-LABEL: nearbyint_v4f32:
; P8: # %bb.0:
-; P8-NEXT: xvrspic v2, v2
+; P8-NEXT: mflr r0
+; P8-NEXT: std r0, 16(r1)
+; P8-NEXT: stdu r1, -176(r1)
+; P8-NEXT: .cfi_def_cfa_offset 176
+; P8-NEXT: .cfi_offset lr, 16
+; P8-NEXT: .cfi_offset v30, -32
+; P8-NEXT: .cfi_offset v31, -16
+; P8-NEXT: xxsldwi vs0, v2, v2, 3
+; P8-NEXT: li r3, 144
+; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; P8-NEXT: li r3, 160
+; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; P8-NEXT: vmr v31, v2
+; P8-NEXT: xscvspdpn f1, vs0
+; P8-NEXT: bl nearbyintf
+; P8-NEXT: nop
+; P8-NEXT: xxsldwi vs0, v31, v31, 1
+; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; P8-NEXT: li r3, 128
+; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill
+; P8-NEXT: xscvspdpn f1, vs0
+; P8-NEXT: bl nearbyintf
+; P8-NEXT: nop
+; P8-NEXT: li r3, 128
+; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: xxmrghd vs0, vs1, vs0
+; P8-NEXT: xscvspdpn f1, v31
+; P8-NEXT: xvcvdpsp v30, vs0
+; P8-NEXT: bl nearbyintf
+; P8-NEXT: nop
+; P8-NEXT: xxswapd vs0, v31
+; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; P8-NEXT: li r3, 128
+; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill
+; P8-NEXT: xscvspdpn f1, vs0
+; P8-NEXT: bl nearbyintf
+; P8-NEXT: nop
+; P8-NEXT: li r3, 128
+; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: li r3, 160
+; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: li r3, 144
+; P8-NEXT: xxmrghd vs0, vs0, vs1
+; P8-NEXT: xvcvdpsp v2, vs0
+; P8-NEXT: vmrgew v2, v2, v30
+; P8-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: addi r1, r1, 176
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
; P8-NEXT: blr
;
; P9-LABEL: nearbyint_v4f32:
; P9: # %bb.0:
-; P9-NEXT: xvrspic v2, v2
+; P9-NEXT: mflr r0
+; P9-NEXT: std r0, 16(r1)
+; P9-NEXT: stdu r1, -80(r1)
+; P9-NEXT: .cfi_def_cfa_offset 80
+; P9-NEXT: .cfi_offset lr, 16
+; P9-NEXT: .cfi_offset v30, -32
+; P9-NEXT: .cfi_offset v31, -16
+; P9-NEXT: xxsldwi vs0, v2, v2, 3
+; P9-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
+; P9-NEXT: xscvspdpn f1, vs0
+; P9-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
+; P9-NEXT: vmr v31, v2
+; P9-NEXT: bl nearbyintf
+; P9-NEXT: nop
+; P9-NEXT: xxsldwi vs0, v31, v31, 1
+; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill
+; P9-NEXT: xscvspdpn f1, vs0
+; P9-NEXT: bl nearbyintf
+; P9-NEXT: nop
+; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload
+; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; P9-NEXT: xxmrghd vs0, vs1, vs0
+; P9-NEXT: xscvspdpn f1, v31
+; P9-NEXT: xvcvdpsp v30, vs0
+; P9-NEXT: bl nearbyintf
+; P9-NEXT: nop
+; P9-NEXT: xxswapd vs0, v31
+; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill
+; P9-NEXT: xscvspdpn f1, vs0
+; P9-NEXT: bl nearbyintf
+; P9-NEXT: nop
+; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload
+; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; P9-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
+; P9-NEXT: xxmrghd vs0, vs0, vs1
+; P9-NEXT: xvcvdpsp v2, vs0
+; P9-NEXT: vmrgew v2, v2, v30
+; P9-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
+; P9-NEXT: addi r1, r1, 80
+; P9-NEXT: ld r0, 16(r1)
+; P9-NEXT: mtlr r0
; P9-NEXT: blr
%res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(
<4 x float> %vf1,
@@ -204,12 +314,62 @@
define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) {
; P8-LABEL: nearbyint_v2f64:
; P8: # %bb.0:
-; P8-NEXT: xvrdpic v2, v2
+; P8-NEXT: mflr r0
+; P8-NEXT: std r0, 16(r1)
+; P8-NEXT: stdu r1, -160(r1)
+; P8-NEXT: .cfi_def_cfa_offset 160
+; P8-NEXT: .cfi_offset lr, 16
+; P8-NEXT: .cfi_offset v31, -16
+; P8-NEXT: li r3, 144
+; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; P8-NEXT: vmr v31, v2
+; P8-NEXT: xxlor f1, v31, v31
+; P8-NEXT: bl nearbyint
+; P8-NEXT: nop
+; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; P8-NEXT: li r3, 128
+; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill
+; P8-NEXT: xxswapd vs1, v31
+; P8-NEXT: # kill: def $f1 killed $f1 killed $vsl1
+; P8-NEXT: bl nearbyint
+; P8-NEXT: nop
+; P8-NEXT: li r3, 128
+; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: li r3, 144
+; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; P8-NEXT: xxmrghd v2, vs0, vs1
+; P8-NEXT: addi r1, r1, 160
+; P8-NEXT: ld r0, 16(r1)
+; P8-NEXT: mtlr r0
; P8-NEXT: blr
;
; P9-LABEL: nearbyint_v2f64:
; P9: # %bb.0:
-; P9-NEXT: xvrdpic v2, v2
+; P9-NEXT: mflr r0
+; P9-NEXT: std r0, 16(r1)
+; P9-NEXT: stdu r1, -64(r1)
+; P9-NEXT: .cfi_def_cfa_offset 64
+; P9-NEXT: .cfi_offset lr, 16
+; P9-NEXT: .cfi_offset v31, -16
+; P9-NEXT: stxv v31, 48(r1) # 16-byte Folded Spill
+; P9-NEXT: vmr v31, v2
+; P9-NEXT: xscpsgndp f1, v31, v31
+; P9-NEXT: bl nearbyint
+; P9-NEXT: nop
+; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; P9-NEXT: stxv vs1, 32(r1) # 16-byte Folded Spill
+; P9-NEXT: xxswapd vs1, v31
+; P9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
+; P9-NEXT: bl nearbyint
+; P9-NEXT: nop
+; P9-NEXT: lxv vs0, 32(r1) # 16-byte Folded Reload
+; P9-NEXT: lxv v31, 48(r1) # 16-byte Folded Reload
+; P9-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; P9-NEXT: xxmrghd v2, vs0, vs1
+; P9-NEXT: addi r1, r1, 64
+; P9-NEXT: ld r0, 16(r1)
+; P9-NEXT: mtlr r0
; P9-NEXT: blr
%res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
<2 x double> %vf1,
Index: llvm/lib/Target/PowerPC/PPCInstrVSX.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -890,15 +890,15 @@
def XSRDPIC : XX2Form<60, 107,
(outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpic $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (any_fnearbyint f64:$XB))]>;
+ [(set f64:$XT, (fnearbyint f64:$XB))]>;
def XVRDPIC : XX2Form<60, 235,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrdpic $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (any_fnearbyint v2f64:$XB))]>;
+ [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
def XVRSPIC : XX2Form<60, 171,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvrspic $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (any_fnearbyint v4f32:$XB))]>;
+ [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
// Max/Min Instructions
let isCommutable = 1 in {
def XSMAXDP : XX3Form<60, 160,
@@ -2681,7 +2681,7 @@
def : Pat<(f32 (any_fround f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPI
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (any_fnearbyint f32:$S)),
+def : Pat<(f32 (fnearbyint f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIC
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
def : Pat<(f32 (any_ffloor f32:$S)),
@@ -2696,11 +2696,11 @@
def : Pat<(f32 (any_frint f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIC
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
+def : Pat<(v4f32 (any_frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
// Rounding for double precision.
-def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>;
-def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
+def : Pat<(f64 (any_frint f64:$S)), (f64 (XSRDPIC $S))>;
+def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
// Materialize a zero-vector of long long
def : Pat<(v2i64 immAllZerosV),
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -316,8 +316,10 @@
setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
- if (Subtarget.hasVSX())
- setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Legal);
+ if (Subtarget.hasVSX()) {
+ setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
+ }
if (Subtarget.hasFSQRT()) {
setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
@@ -1059,7 +1061,7 @@
setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
- setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
@@ -1073,7 +1075,7 @@
setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
- setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
Index: clang/test/CodeGen/builtins-ppc-vsx.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-vsx.c
+++ clang/test/CodeGen/builtins-ppc-vsx.c
@@ -863,12 +863,12 @@
// CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvredp(<2 x double>
res_vf = vec_rint(vf);
-// CHECK: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}})
-// CHECK-LE: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{[0-9]+}})
+// CHECK: call <4 x float> @llvm.rint.v4f32(<4 x float> %{{[0-9]+}})
+// CHECK-LE: call <4 x float> @llvm.rint.v4f32(<4 x float> %{{[0-9]+}})
res_vd = vec_rint(vd);
-// CHECK: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}})
-// CHECK-LE: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %{{[0-9]+}})
+// CHECK: call <2 x double> @llvm.rint.v2f64(<2 x double> %{{[0-9]+}})
+// CHECK-LE: call <2 x double> @llvm.rint.v2f64(<2 x double> %{{[0-9]+}})
res_vf = vec_rsqrte(vf);
// CHECK: call <4 x float> @llvm.ppc.vsx.xvrsqrtesp(<4 x float> %{{[0-9]+}})
Index: clang/test/CodeGen/builtins-ppc-fpconstrained.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-fpconstrained.c
+++ clang/test/CodeGen/builtins-ppc-fpconstrained.c
@@ -59,14 +59,14 @@
vf = __builtin_vsx_xvrspic(vf);
// CHECK-LABEL: try-xvrspic
- // CHECK-UNCONSTRAINED: @llvm.nearbyint.v4f32(<4 x float> %{{.*}})
- // CHECK-CONSTRAINED: @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ // CHECK-UNCONSTRAINED: @llvm.rint.v4f32(<4 x float> %{{.*}})
+ // CHECK-CONSTRAINED: @llvm.experimental.constrained.rint.v4f32(<4 x float> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
// CHECK-ASM: xvrspic
vd = __builtin_vsx_xvrdpic(vd);
// CHECK-LABEL: try-xvrdpic
- // CHECK-UNCONSTRAINED: @llvm.nearbyint.v2f64(<2 x double> %{{.*}})
- // CHECK-CONSTRAINED: @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ // CHECK-UNCONSTRAINED: @llvm.rint.v2f64(<2 x double> %{{.*}})
+ // CHECK-CONSTRAINED: @llvm.experimental.constrained.rint.v2f64(<2 x double> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
// CHECK-ASM: xvrdpic
vf = __builtin_vsx_xvrspip(vf);
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -14273,8 +14273,8 @@
else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
BuiltinID == PPC::BI__builtin_vsx_xvrspic)
ID = Builder.getIsFPConstrained()
- ? Intrinsic::experimental_constrained_nearbyint
- : Intrinsic::nearbyint;
+ ? Intrinsic::experimental_constrained_rint
+ : Intrinsic::rint;
else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
BuiltinID == PPC::BI__builtin_vsx_xvrspip)
ID = Builder.getIsFPConstrained()
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits