Author: Alex Bradbury Date: 2024-07-10T15:28:18+01:00 New Revision: f1f8d8789af2627fd04a0e98203189f6da14a261
URL: https://github.com/llvm/llvm-project/commit/f1f8d8789af2627fd04a0e98203189f6da14a261 DIFF: https://github.com/llvm/llvm-project/commit/f1f8d8789af2627fd04a0e98203189f6da14a261.diff LOG: Revert "[RISCV] Enable TTI::shouldDropLSRSolutionIfLessProfitable by default …" This reverts commit af47a4ec503fe3efc6ade8cad4882881a202ed41. Added: Modified: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll llvm/test/CodeGen/RISCV/rvv/pr95865.ll llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 7b239b8fc17a3..9c37a4f6ec2d0 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -398,8 +398,6 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> { return true; } - bool shouldDropLSRSolutionIfLessProfitable() const { return true; } - std::optional<unsigned> getMinPageSize() const { return 4096; } }; diff --git a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll index 92639be0017e8..2b4b8e979f3d7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll @@ -86,29 +86,30 @@ declare i64 @llvm.vscale.i64() define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) { ; NO-SINK-LABEL: sink_splat_add_scalable: ; NO-SINK: # %bb.0: # %entry -; NO-SINK-NEXT: csrr a2, vlenb -; NO-SINK-NEXT: srli a2, a2, 1 +; NO-SINK-NEXT: csrr a5, vlenb +; NO-SINK-NEXT: srli a2, a5, 1 ; NO-SINK-NEXT: li a3, 1024 ; NO-SINK-NEXT: bgeu a3, a2, .LBB1_2 ; NO-SINK-NEXT: # %bb.1: ; NO-SINK-NEXT: li a3, 0 ; NO-SINK-NEXT: j .LBB1_5 ; NO-SINK-NEXT: .LBB1_2: # %vector.ph -; NO-SINK-NEXT: li a5, 0 ; NO-SINK-NEXT: addi a3, a2, -1 ; NO-SINK-NEXT: andi a4, a3, 1024 ; NO-SINK-NEXT: xori a3, a4, 1024 ; NO-SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma ; NO-SINK-NEXT: vmv.v.x v8, a1 +; NO-SINK-NEXT: slli a5, a5, 1 +; NO-SINK-NEXT: mv a6, a0 +; NO-SINK-NEXT: mv a7, a3 ; NO-SINK-NEXT: .LBB1_3: # %vector.body ; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1 -; NO-SINK-NEXT: slli a6, a5, 2 -; NO-SINK-NEXT: add a6, a0, a6 ; NO-SINK-NEXT: vl2re32.v v10, (a6) ; NO-SINK-NEXT: vadd.vv v10, v10, v8 -; NO-SINK-NEXT: add a5, a5, a2 ; NO-SINK-NEXT: vs2r.v v10, (a6) -; NO-SINK-NEXT: bne a5, a3, .LBB1_3 +; NO-SINK-NEXT: sub a7, a7, a2 +; NO-SINK-NEXT: add a6, a6, a5 +; NO-SINK-NEXT: bnez a7, .LBB1_3 ; NO-SINK-NEXT: # %bb.4: # %middle.block ; NO-SINK-NEXT: beqz a4, .LBB1_7 ; NO-SINK-NEXT: .LBB1_5: # %for.body.preheader @@ -128,28 +129,29 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) { ; ; SINK-LABEL: sink_splat_add_scalable: ; SINK: # %bb.0: # %entry -; SINK-NEXT: csrr a2, vlenb -; SINK-NEXT: srli a2, a2, 1 +; SINK-NEXT: csrr a5, vlenb +; SINK-NEXT: srli a2, a5, 1 ; SINK-NEXT: li a3, 1024 ; SINK-NEXT: bgeu a3, a2, .LBB1_2 ; SINK-NEXT: # %bb.1: ; SINK-NEXT: li a3, 0 ; SINK-NEXT: j .LBB1_5 ; SINK-NEXT: .LBB1_2: # %vector.ph -; SINK-NEXT: li a5, 0 ; SINK-NEXT: addi a3, a2, -1 ; SINK-NEXT: andi a4, a3, 1024 ; SINK-NEXT: xori a3, a4, 1024 -; SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; SINK-NEXT: slli a5, a5, 1 +; SINK-NEXT: mv a6, a0 +; SINK-NEXT: mv a7, a3 +; SINK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; SINK-NEXT: .LBB1_3: # %vector.body ; SINK-NEXT: # =>This Inner Loop Header: Depth=1 -; SINK-NEXT: slli a6, a5, 2 -; SINK-NEXT: add a6, a0, a6 ; SINK-NEXT: vl2re32.v v8, (a6) ; SINK-NEXT: vadd.vx v8, v8, a1 -; SINK-NEXT: add a5, a5, a2 ; SINK-NEXT: vs2r.v v8, (a6) -; SINK-NEXT: bne a5, a3, .LBB1_3 +; SINK-NEXT: sub a7, a7, a2 +; SINK-NEXT: add a6, a6, a5 +; SINK-NEXT: bnez a7, .LBB1_3 ; SINK-NEXT: # %bb.4: # %middle.block ; SINK-NEXT: beqz a4, .LBB1_7 ; SINK-NEXT: .LBB1_5: # %for.body.preheader @@ -169,28 +171,29 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) { ; ; DEFAULT-LABEL: sink_splat_add_scalable: ; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: csrr a2, vlenb -; DEFAULT-NEXT: srli a2, a2, 1 +; DEFAULT-NEXT: csrr a5, vlenb +; DEFAULT-NEXT: srli a2, a5, 1 ; DEFAULT-NEXT: li a3, 1024 ; DEFAULT-NEXT: bgeu a3, a2, .LBB1_2 ; DEFAULT-NEXT: # %bb.1: ; DEFAULT-NEXT: li a3, 0 ; DEFAULT-NEXT: j .LBB1_5 ; DEFAULT-NEXT: .LBB1_2: # %vector.ph -; DEFAULT-NEXT: li a5, 0 ; DEFAULT-NEXT: addi a3, a2, -1 ; DEFAULT-NEXT: andi a4, a3, 1024 ; DEFAULT-NEXT: xori a3, a4, 1024 -; DEFAULT-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; DEFAULT-NEXT: slli a5, a5, 1 +; DEFAULT-NEXT: mv a6, a0 +; DEFAULT-NEXT: mv a7, a3 +; DEFAULT-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; DEFAULT-NEXT: .LBB1_3: # %vector.body ; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1 -; DEFAULT-NEXT: slli a6, a5, 2 -; DEFAULT-NEXT: add a6, a0, a6 ; DEFAULT-NEXT: vl2re32.v v8, (a6) ; DEFAULT-NEXT: vadd.vx v8, v8, a1 -; DEFAULT-NEXT: add a5, a5, a2 ; DEFAULT-NEXT: vs2r.v v8, (a6) -; DEFAULT-NEXT: bne a5, a3, .LBB1_3 +; DEFAULT-NEXT: sub a7, a7, a2 +; DEFAULT-NEXT: add a6, a6, a5 +; DEFAULT-NEXT: bnez a7, .LBB1_3 ; DEFAULT-NEXT: # %bb.4: # %middle.block ; DEFAULT-NEXT: beqz a4, .LBB1_7 ; DEFAULT-NEXT: .LBB1_5: # %for.body.preheader @@ -404,32 +407,32 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; NO-SINK-LABEL: sink_splat_fadd_scalable: ; NO-SINK: # %bb.0: # %entry ; NO-SINK-NEXT: csrr a1, vlenb -; NO-SINK-NEXT: srli a1, a1, 2 -; NO-SINK-NEXT: li a2, 1024 -; NO-SINK-NEXT: bgeu a2, a1, .LBB4_2 +; NO-SINK-NEXT: srli a2, a1, 2 +; NO-SINK-NEXT: li a3, 1024 +; NO-SINK-NEXT: bgeu a3, a2, .LBB4_2 ; NO-SINK-NEXT: # %bb.1: -; NO-SINK-NEXT: li a2, 0 +; NO-SINK-NEXT: li a3, 0 ; NO-SINK-NEXT: j .LBB4_5 ; NO-SINK-NEXT: .LBB4_2: # %vector.ph -; NO-SINK-NEXT: li a4, 0 -; NO-SINK-NEXT: addi a2, a1, -1 -; NO-SINK-NEXT: andi a3, a2, 1024 -; NO-SINK-NEXT: xori a2, a3, 1024 +; NO-SINK-NEXT: addi a3, a2, -1 +; NO-SINK-NEXT: andi a4, a3, 1024 +; NO-SINK-NEXT: xori a3, a4, 1024 ; NO-SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma ; NO-SINK-NEXT: vfmv.v.f v8, fa0 +; NO-SINK-NEXT: mv a5, a0 +; NO-SINK-NEXT: mv a6, a3 ; NO-SINK-NEXT: .LBB4_3: # %vector.body ; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1 -; NO-SINK-NEXT: slli a5, a4, 2 -; NO-SINK-NEXT: add a5, a0, a5 ; NO-SINK-NEXT: vl1re32.v v9, (a5) ; NO-SINK-NEXT: vfadd.vv v9, v9, v8 -; NO-SINK-NEXT: add a4, a4, a1 ; NO-SINK-NEXT: vs1r.v v9, (a5) -; NO-SINK-NEXT: bne a4, a2, .LBB4_3 +; NO-SINK-NEXT: sub a6, a6, a2 +; NO-SINK-NEXT: add a5, a5, a1 +; NO-SINK-NEXT: bnez a6, .LBB4_3 ; NO-SINK-NEXT: # %bb.4: # %middle.block -; NO-SINK-NEXT: beqz a3, .LBB4_7 +; NO-SINK-NEXT: beqz a4, .LBB4_7 ; NO-SINK-NEXT: .LBB4_5: # %for.body.preheader -; NO-SINK-NEXT: slli a1, a2, 2 +; NO-SINK-NEXT: slli a1, a3, 2 ; NO-SINK-NEXT: add a1, a0, a1 ; NO-SINK-NEXT: lui a2, 1 ; NO-SINK-NEXT: add a0, a0, a2 @@ -446,31 +449,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; SINK-LABEL: sink_splat_fadd_scalable: ; SINK: # %bb.0: # %entry ; SINK-NEXT: csrr a1, vlenb -; SINK-NEXT: srli a1, a1, 2 -; SINK-NEXT: li a2, 1024 -; SINK-NEXT: bgeu a2, a1, .LBB4_2 +; SINK-NEXT: srli a2, a1, 2 +; SINK-NEXT: li a3, 1024 +; SINK-NEXT: bgeu a3, a2, .LBB4_2 ; SINK-NEXT: # %bb.1: -; SINK-NEXT: li a2, 0 +; SINK-NEXT: li a3, 0 ; SINK-NEXT: j .LBB4_5 ; SINK-NEXT: .LBB4_2: # %vector.ph -; SINK-NEXT: li a4, 0 -; SINK-NEXT: addi a2, a1, -1 -; SINK-NEXT: andi a3, a2, 1024 -; SINK-NEXT: xori a2, a3, 1024 -; SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma +; SINK-NEXT: addi a3, a2, -1 +; SINK-NEXT: andi a4, a3, 1024 +; SINK-NEXT: xori a3, a4, 1024 +; SINK-NEXT: mv a5, a0 +; SINK-NEXT: mv a6, a3 +; SINK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; SINK-NEXT: .LBB4_3: # %vector.body ; SINK-NEXT: # =>This Inner Loop Header: Depth=1 -; SINK-NEXT: slli a5, a4, 2 -; SINK-NEXT: add a5, a0, a5 ; SINK-NEXT: vl1re32.v v8, (a5) ; SINK-NEXT: vfadd.vf v8, v8, fa0 -; SINK-NEXT: add a4, a4, a1 ; SINK-NEXT: vs1r.v v8, (a5) -; SINK-NEXT: bne a4, a2, .LBB4_3 +; SINK-NEXT: sub a6, a6, a2 +; SINK-NEXT: add a5, a5, a1 +; SINK-NEXT: bnez a6, .LBB4_3 ; SINK-NEXT: # %bb.4: # %middle.block -; SINK-NEXT: beqz a3, .LBB4_7 +; SINK-NEXT: beqz a4, .LBB4_7 ; SINK-NEXT: .LBB4_5: # %for.body.preheader -; SINK-NEXT: slli a1, a2, 2 +; SINK-NEXT: slli a1, a3, 2 ; SINK-NEXT: add a1, a0, a1 ; SINK-NEXT: lui a2, 1 ; SINK-NEXT: add a0, a0, a2 @@ -487,31 +490,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; DEFAULT-LABEL: sink_splat_fadd_scalable: ; DEFAULT: # %bb.0: # %entry ; DEFAULT-NEXT: csrr a1, vlenb -; DEFAULT-NEXT: srli a1, a1, 2 -; DEFAULT-NEXT: li a2, 1024 -; DEFAULT-NEXT: bgeu a2, a1, .LBB4_2 +; DEFAULT-NEXT: srli a2, a1, 2 +; DEFAULT-NEXT: li a3, 1024 +; DEFAULT-NEXT: bgeu a3, a2, .LBB4_2 ; DEFAULT-NEXT: # %bb.1: -; DEFAULT-NEXT: li a2, 0 +; DEFAULT-NEXT: li a3, 0 ; DEFAULT-NEXT: j .LBB4_5 ; DEFAULT-NEXT: .LBB4_2: # %vector.ph -; DEFAULT-NEXT: li a4, 0 -; DEFAULT-NEXT: addi a2, a1, -1 -; DEFAULT-NEXT: andi a3, a2, 1024 -; DEFAULT-NEXT: xori a2, a3, 1024 -; DEFAULT-NEXT: vsetvli a5, zero, e32, m1, ta, ma +; DEFAULT-NEXT: addi a3, a2, -1 +; DEFAULT-NEXT: andi a4, a3, 1024 +; DEFAULT-NEXT: xori a3, a4, 1024 +; DEFAULT-NEXT: mv a5, a0 +; DEFAULT-NEXT: mv a6, a3 +; DEFAULT-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; DEFAULT-NEXT: .LBB4_3: # %vector.body ; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1 -; DEFAULT-NEXT: slli a5, a4, 2 -; DEFAULT-NEXT: add a5, a0, a5 ; DEFAULT-NEXT: vl1re32.v v8, (a5) ; DEFAULT-NEXT: vfadd.vf v8, v8, fa0 -; DEFAULT-NEXT: add a4, a4, a1 ; DEFAULT-NEXT: vs1r.v v8, (a5) -; DEFAULT-NEXT: bne a4, a2, .LBB4_3 +; DEFAULT-NEXT: sub a6, a6, a2 +; DEFAULT-NEXT: add a5, a5, a1 +; DEFAULT-NEXT: bnez a6, .LBB4_3 ; DEFAULT-NEXT: # %bb.4: # %middle.block -; DEFAULT-NEXT: beqz a3, .LBB4_7 +; DEFAULT-NEXT: beqz a4, .LBB4_7 ; DEFAULT-NEXT: .LBB4_5: # %for.body.preheader -; DEFAULT-NEXT: slli a1, a2, 2 +; DEFAULT-NEXT: slli a1, a3, 2 ; DEFAULT-NEXT: add a1, a0, a1 ; DEFAULT-NEXT: lui a2, 1 ; DEFAULT-NEXT: add a0, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll index c95301809375c..3cb3c94d4e1f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll @@ -7,106 +7,143 @@ define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscale x 4 x i1> %arg.6, i64 %arg.7, i1 %arg.8, i64 %arg.9, i32 %arg.10) vscale_range(2,2) { ; CHECK-LABEL: main: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -112 +; CHECK-NEXT: .cfi_def_cfa_offset 112 +; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 ; CHECK-NEXT: .cfi_offset s3, -40 -; CHECK-NEXT: li a7, 0 -; CHECK-NEXT: ld s2, 48(sp) +; CHECK-NEXT: .cfi_offset s4, -48 +; CHECK-NEXT: .cfi_offset s5, -56 +; CHECK-NEXT: .cfi_offset s6, -64 +; CHECK-NEXT: .cfi_offset s7, -72 +; CHECK-NEXT: .cfi_offset s8, -80 +; CHECK-NEXT: .cfi_offset s9, -88 +; CHECK-NEXT: .cfi_offset s10, -96 +; CHECK-NEXT: .cfi_offset s11, -104 +; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li a7, 8 +; CHECK-NEXT: li t0, 12 +; CHECK-NEXT: li s0, 4 +; CHECK-NEXT: li t1, 20 +; CHECK-NEXT: ld a1, 112(sp) +; CHECK-NEXT: sd a1, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: andi s3, a0, 1 -; CHECK-NEXT: andi t1, a2, 1 -; CHECK-NEXT: andi a6, a4, 1 +; CHECK-NEXT: andi t3, a4, 1 +; CHECK-NEXT: li t2, 4 ; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader.i ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_2 Depth 2 ; CHECK-NEXT: # Child Loop BB0_3 Depth 3 ; CHECK-NEXT: # Child Loop BB0_4 Depth 4 ; CHECK-NEXT: # Child Loop BB0_5 Depth 5 -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: mv t4, t1 +; CHECK-NEXT: mv t5, t2 +; CHECK-NEXT: mv t6, t0 +; CHECK-NEXT: mv s3, a7 +; CHECK-NEXT: mv a6, s2 ; CHECK-NEXT: .LBB0_2: # %for.cond5.preheader.i ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Loop Header: Depth=2 ; CHECK-NEXT: # Child Loop BB0_3 Depth 3 ; CHECK-NEXT: # Child Loop BB0_4 Depth 4 ; CHECK-NEXT: # Child Loop BB0_5 Depth 5 -; CHECK-NEXT: li t3, 0 -; CHECK-NEXT: add t2, t0, a7 +; CHECK-NEXT: mv s5, t4 +; CHECK-NEXT: mv s6, t5 +; CHECK-NEXT: mv s7, t6 +; CHECK-NEXT: mv s8, s3 +; CHECK-NEXT: mv s4, a6 ; CHECK-NEXT: .LBB0_3: # %for.cond9.preheader.i ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # Parent Loop BB0_2 Depth=2 ; CHECK-NEXT: # => This Loop Header: Depth=3 ; CHECK-NEXT: # Child Loop BB0_4 Depth 4 ; CHECK-NEXT: # Child Loop BB0_5 Depth 5 -; CHECK-NEXT: li t5, 0 -; CHECK-NEXT: add t4, t2, t3 +; CHECK-NEXT: mv s11, s5 +; CHECK-NEXT: mv a3, s6 +; CHECK-NEXT: mv ra, s7 +; CHECK-NEXT: mv a4, s8 +; CHECK-NEXT: mv s9, s4 ; CHECK-NEXT: .LBB0_4: # %vector.ph.i ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # Parent Loop BB0_2 Depth=2 ; CHECK-NEXT: # Parent Loop BB0_3 Depth=3 ; CHECK-NEXT: # => This Loop Header: Depth=4 ; CHECK-NEXT: # Child Loop BB0_5 Depth 5 -; CHECK-NEXT: li s1, 0 -; CHECK-NEXT: add a2, t4, t5 -; CHECK-NEXT: addi a0, a2, 2 -; CHECK-NEXT: addi a3, a2, 3 -; CHECK-NEXT: addi a1, a2, 1 -; CHECK-NEXT: addi a4, a2, 5 -; CHECK-NEXT: li a5, 1 +; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: .LBB0_5: # %vector.body.i ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # Parent Loop BB0_2 Depth=2 ; CHECK-NEXT: # Parent Loop BB0_3 Depth=3 ; CHECK-NEXT: # Parent Loop BB0_4 Depth=4 ; CHECK-NEXT: # => This Inner Loop Header: Depth=5 -; CHECK-NEXT: mv t6, s1 -; CHECK-NEXT: addi a2, a2, 1 -; CHECK-NEXT: addi a5, a5, 1 -; CHECK-NEXT: slli s1, a0, 2 -; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: addi a3, a3, 1 -; CHECK-NEXT: slli s0, a1, 2 -; CHECK-NEXT: addi a1, a1, 1 -; CHECK-NEXT: addi a4, a4, 1 -; CHECK-NEXT: vse32.v v8, (s1), v0.t -; CHECK-NEXT: vse32.v v8, (s0), v0.t -; CHECK-NEXT: addi s1, t6, 1 -; CHECK-NEXT: bnez t6, .LBB0_5 +; CHECK-NEXT: addi s1, a5, 4 +; CHECK-NEXT: add a1, a4, a5 +; CHECK-NEXT: vse32.v v8, (a1), v0.t +; CHECK-NEXT: add a5, a5, a3 +; CHECK-NEXT: vse32.v v8, (a5), v0.t +; CHECK-NEXT: mv a5, s1 +; CHECK-NEXT: bne s1, s0, .LBB0_5 ; CHECK-NEXT: # %bb.6: # %for.cond.cleanup15.i ; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=4 -; CHECK-NEXT: addi t5, t5, 1 -; CHECK-NEXT: beqz s3, .LBB0_4 +; CHECK-NEXT: addi s9, s9, 4 +; CHECK-NEXT: addi a4, a4, 4 +; CHECK-NEXT: addi ra, ra, 4 +; CHECK-NEXT: addi a3, a3, 4 +; CHECK-NEXT: andi s10, a0, 1 +; CHECK-NEXT: addi s11, s11, 4 +; CHECK-NEXT: beqz s10, .LBB0_4 ; CHECK-NEXT: # %bb.7: # %for.cond.cleanup11.i ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=3 -; CHECK-NEXT: addi t3, t3, 1 -; CHECK-NEXT: beqz t1, .LBB0_3 +; CHECK-NEXT: addi s4, s4, 4 +; CHECK-NEXT: addi s8, s8, 4 +; CHECK-NEXT: addi s7, s7, 4 +; CHECK-NEXT: addi s6, s6, 4 +; CHECK-NEXT: andi a1, a2, 1 +; CHECK-NEXT: addi s5, s5, 4 +; CHECK-NEXT: beqz a1, .LBB0_3 ; CHECK-NEXT: # %bb.8: # %for.cond.cleanup7.i ; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=2 -; CHECK-NEXT: addi t0, t0, 1 -; CHECK-NEXT: beqz a6, .LBB0_2 +; CHECK-NEXT: addi a6, a6, 4 +; CHECK-NEXT: addi s3, s3, 4 +; CHECK-NEXT: addi t6, t6, 4 +; CHECK-NEXT: addi t5, t5, 4 +; CHECK-NEXT: addi t4, t4, 4 +; CHECK-NEXT: beqz t3, .LBB0_2 ; CHECK-NEXT: # %bb.9: # %for.cond.cleanup3.i ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: addi a7, a7, 1 -; CHECK-NEXT: beqz t1, .LBB0_1 +; CHECK-NEXT: addi s2, s2, 4 +; CHECK-NEXT: addi a7, a7, 4 +; CHECK-NEXT: addi t0, t0, 4 +; CHECK-NEXT: addi t2, t2, 4 +; CHECK-NEXT: addi t1, t1, 4 +; CHECK-NEXT: beqz a1, .LBB0_1 ; CHECK-NEXT: # %bb.10: # %l.exit ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: jalr a0 -; CHECK-NEXT: beqz s3, .LBB0_12 +; CHECK-NEXT: beqz s10, .LBB0_12 ; CHECK-NEXT: .LBB0_11: # %for.body7.us.14 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: j .LBB0_11 ; CHECK-NEXT: .LBB0_12: # %for.body7.us.19 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmv.s.x v8, s2 +; CHECK-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v16, v8, 1 @@ -116,12 +153,20 @@ define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscal ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: sb a0, 0(zero) ; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 112 ; CHECK-NEXT: ret entry: %0 = tail call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64() diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index 7742cfc7da640..d1c98f828e76d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -243,28 +243,29 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_mul_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB7_5 ; CHECK-NEXT: .LBB7_2: # %vector.ph -; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB7_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: vl2re32.v v8, (a6) ; CHECK-NEXT: vmul.vx v8, v8, a1 -; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: bne a5, a3, .LBB7_3 +; CHECK-NEXT: sub a7, a7, a2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: bnez a7, .LBB7_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB7_7 ; CHECK-NEXT: .LBB7_5: # %for.body.preheader @@ -333,28 +334,29 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_add_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB8_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB8_5 ; CHECK-NEXT: .LBB8_2: # %vector.ph -; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB8_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: vl2re32.v v8, (a6) ; CHECK-NEXT: vadd.vx v8, v8, a1 -; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: bne a5, a3, .LBB8_3 +; CHECK-NEXT: sub a7, a7, a2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: bnez a7, .LBB8_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB8_7 ; CHECK-NEXT: .LBB8_5: # %for.body.preheader @@ -423,28 +425,29 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_sub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB9_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB9_5 ; CHECK-NEXT: .LBB9_2: # %vector.ph -; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB9_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: vl2re32.v v8, (a6) ; CHECK-NEXT: vsub.vx v8, v8, a1 -; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: bne a5, a3, .LBB9_3 +; CHECK-NEXT: sub a7, a7, a2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: bnez a7, .LBB9_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB9_7 ; CHECK-NEXT: .LBB9_5: # %for.body.preheader @@ -513,28 +516,29 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_rsub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB10_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB10_5 ; CHECK-NEXT: .LBB10_2: # %vector.ph -; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB10_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: vl2re32.v v8, (a6) ; CHECK-NEXT: vrsub.vx v8, v8, a1 -; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: bne a5, a3, .LBB10_3 +; CHECK-NEXT: sub a7, a7, a2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: bnez a7, .LBB10_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB10_7 ; CHECK-NEXT: .LBB10_5: # %for.body.preheader @@ -603,28 +607,29 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_and_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB11_5 ; CHECK-NEXT: .LBB11_2: # %vector.ph -; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB11_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: vl2re32.v v8, (a6) ; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: bne a5, a3, .LBB11_3 +; CHECK-NEXT: sub a7, a7, a2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: bnez a7, .LBB11_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB11_7 ; CHECK-NEXT: .LBB11_5: # %for.body.preheader @@ -693,28 +698,29 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_or_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB12_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB12_5 ; CHECK-NEXT: .LBB12_2: # %vector.ph -; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB12_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: vl2re32.v v8, (a6) ; CHECK-NEXT: vor.vx v8, v8, a1 -; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: bne a5, a3, .LBB12_3 +; CHECK-NEXT: sub a7, a7, a2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: bnez a7, .LBB12_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB12_7 ; CHECK-NEXT: .LBB12_5: # %for.body.preheader @@ -783,28 +789,29 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_xor_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB13_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB13_5 ; CHECK-NEXT: .LBB13_2: # %vector.ph -; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB13_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: vl2re32.v v8, (a6) ; CHECK-NEXT: vxor.vx v8, v8, a1 -; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: bne a5, a3, .LBB13_3 +; CHECK-NEXT: sub a7, a7, a2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: bnez a7, .LBB13_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB13_7 ; CHECK-NEXT: .LBB13_5: # %for.body.preheader @@ -975,28 +982,29 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_shl_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB17_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB17_5 ; CHECK-NEXT: .LBB17_2: # %vector.ph -; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB17_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: vl2re32.v v8, (a6) ; CHECK-NEXT: vsll.vx v8, v8, a1 -; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: bne a5, a3, .LBB17_3 +; CHECK-NEXT: sub a7, a7, a2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: bnez a7, .LBB17_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB17_7 ; CHECK-NEXT: .LBB17_5: # %for.body.preheader @@ -1065,28 +1073,29 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_lshr_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB18_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB18_5 ; CHECK-NEXT: .LBB18_2: # %vector.ph -; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB18_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: vl2re32.v v8, (a6) ; CHECK-NEXT: vsrl.vx v8, v8, a1 -; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: bne a5, a3, .LBB18_3 +; CHECK-NEXT: sub a7, a7, a2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: bnez a7, .LBB18_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB18_7 ; CHECK-NEXT: .LBB18_5: # %for.body.preheader @@ -1155,32 +1164,33 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_ashr_scalable(ptr nocapture %a) { ; CHECK-LABEL: sink_splat_ashr_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a1, a1, 1 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a1, .LBB19_2 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: srli a2, a4, 1 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: bgeu a1, a2, .LBB19_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB19_5 ; CHECK-NEXT: .LBB19_2: # %vector.ph -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: addi a2, a1, -1 -; CHECK-NEXT: andi a3, a2, 1024 -; CHECK-NEXT: xori a2, a3, 1024 -; CHECK-NEXT: vsetvli a5, zero, e32, m2, ta, ma +; CHECK-NEXT: addi a1, a2, -1 +; CHECK-NEXT: andi a3, a1, 1024 +; CHECK-NEXT: xori a1, a3, 1024 +; CHECK-NEXT: slli a4, a4, 1 +; CHECK-NEXT: mv a5, a0 +; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB19_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a5, a4, 2 -; CHECK-NEXT: add a5, a0, a5 ; CHECK-NEXT: vl2re32.v v8, (a5) ; CHECK-NEXT: vsra.vi v8, v8, 2 -; CHECK-NEXT: add a4, a4, a1 ; CHECK-NEXT: vs2r.v v8, (a5) -; CHECK-NEXT: bne a4, a2, .LBB19_3 +; CHECK-NEXT: sub a6, a6, a2 +; CHECK-NEXT: add a5, a5, a4 +; CHECK-NEXT: bnez a6, .LBB19_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a3, .LBB19_7 ; CHECK-NEXT: .LBB19_5: # %for.body.preheader -; CHECK-NEXT: slli a1, a2, 2 +; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a0, a0, a2 @@ -1448,31 +1458,31 @@ define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fmul_scalable: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a1, .LBB26_2 +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: bgeu a3, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB26_5 ; CHECK-NEXT: .LBB26_2: # %vector.ph -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: addi a2, a1, -1 -; CHECK-NEXT: andi a3, a2, 1024 -; CHECK-NEXT: xori a2, a3, 1024 -; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma +; CHECK-NEXT: addi a3, a2, -1 +; CHECK-NEXT: andi a4, a3, 1024 +; CHECK-NEXT: xori a3, a4, 1024 +; CHECK-NEXT: mv a5, a0 +; CHECK-NEXT: mv a6, a3 +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB26_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a5, a4, 2 -; CHECK-NEXT: add a5, a0, a5 ; CHECK-NEXT: vl1re32.v v8, (a5) ; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: add a4, a4, a1 ; CHECK-NEXT: vs1r.v v8, (a5) -; CHECK-NEXT: bne a4, a2, .LBB26_3 +; CHECK-NEXT: sub a6, a6, a2 +; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: bnez a6, .LBB26_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a3, .LBB26_7 +; CHECK-NEXT: beqz a4, .LBB26_7 ; CHECK-NEXT: .LBB26_5: # %for.body.preheader -; CHECK-NEXT: slli a1, a2, 2 +; CHECK-NEXT: slli a1, a3, 2 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a0, a0, a2 @@ -1538,31 +1548,31 @@ define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fdiv_scalable: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a1, .LBB27_2 +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: bgeu a3, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB27_5 ; CHECK-NEXT: .LBB27_2: # %vector.ph -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: addi a2, a1, -1 -; CHECK-NEXT: andi a3, a2, 1024 -; CHECK-NEXT: xori a2, a3, 1024 -; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma +; CHECK-NEXT: addi a3, a2, -1 +; CHECK-NEXT: andi a4, a3, 1024 +; CHECK-NEXT: xori a3, a4, 1024 +; CHECK-NEXT: mv a5, a0 +; CHECK-NEXT: mv a6, a3 +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB27_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a5, a4, 2 -; CHECK-NEXT: add a5, a0, a5 ; CHECK-NEXT: vl1re32.v v8, (a5) ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: add a4, a4, a1 ; CHECK-NEXT: vs1r.v v8, (a5) -; CHECK-NEXT: bne a4, a2, .LBB27_3 +; CHECK-NEXT: sub a6, a6, a2 +; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: bnez a6, .LBB27_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a3, .LBB27_7 +; CHECK-NEXT: beqz a4, .LBB27_7 ; CHECK-NEXT: .LBB27_5: # %for.body.preheader -; CHECK-NEXT: slli a1, a2, 2 +; CHECK-NEXT: slli a1, a3, 2 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a0, a0, a2 @@ -1628,31 +1638,31 @@ define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frdiv_scalable: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a1, .LBB28_2 +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: bgeu a3, a2, .LBB28_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB28_5 ; CHECK-NEXT: .LBB28_2: # %vector.ph -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: addi a2, a1, -1 -; CHECK-NEXT: andi a3, a2, 1024 -; CHECK-NEXT: xori a2, a3, 1024 -; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma +; CHECK-NEXT: addi a3, a2, -1 +; CHECK-NEXT: andi a4, a3, 1024 +; CHECK-NEXT: xori a3, a4, 1024 +; CHECK-NEXT: mv a5, a0 +; CHECK-NEXT: mv a6, a3 +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB28_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a5, a4, 2 -; CHECK-NEXT: add a5, a0, a5 ; CHECK-NEXT: vl1re32.v v8, (a5) ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: add a4, a4, a1 ; CHECK-NEXT: vs1r.v v8, (a5) -; CHECK-NEXT: bne a4, a2, .LBB28_3 +; CHECK-NEXT: sub a6, a6, a2 +; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: bnez a6, .LBB28_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a3, .LBB28_7 +; CHECK-NEXT: beqz a4, .LBB28_7 ; CHECK-NEXT: .LBB28_5: # %for.body.preheader -; CHECK-NEXT: slli a1, a2, 2 +; CHECK-NEXT: slli a1, a3, 2 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a0, a0, a2 @@ -1718,31 +1728,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fadd_scalable: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a1, .LBB29_2 +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: bgeu a3, a2, .LBB29_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB29_5 ; CHECK-NEXT: .LBB29_2: # %vector.ph -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: addi a2, a1, -1 -; CHECK-NEXT: andi a3, a2, 1024 -; CHECK-NEXT: xori a2, a3, 1024 -; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma +; CHECK-NEXT: addi a3, a2, -1 +; CHECK-NEXT: andi a4, a3, 1024 +; CHECK-NEXT: xori a3, a4, 1024 +; CHECK-NEXT: mv a5, a0 +; CHECK-NEXT: mv a6, a3 +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB29_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a5, a4, 2 -; CHECK-NEXT: add a5, a0, a5 ; CHECK-NEXT: vl1re32.v v8, (a5) ; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: add a4, a4, a1 ; CHECK-NEXT: vs1r.v v8, (a5) -; CHECK-NEXT: bne a4, a2, .LBB29_3 +; CHECK-NEXT: sub a6, a6, a2 +; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: bnez a6, .LBB29_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a3, .LBB29_7 +; CHECK-NEXT: beqz a4, .LBB29_7 ; CHECK-NEXT: .LBB29_5: # %for.body.preheader -; CHECK-NEXT: slli a1, a2, 2 +; CHECK-NEXT: slli a1, a3, 2 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a0, a0, a2 @@ -1808,31 +1818,31 @@ define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fsub_scalable: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a1, .LBB30_2 +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: bgeu a3, a2, .LBB30_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB30_5 ; CHECK-NEXT: .LBB30_2: # %vector.ph -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: addi a2, a1, -1 -; CHECK-NEXT: andi a3, a2, 1024 -; CHECK-NEXT: xori a2, a3, 1024 -; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma +; CHECK-NEXT: addi a3, a2, -1 +; CHECK-NEXT: andi a4, a3, 1024 +; CHECK-NEXT: xori a3, a4, 1024 +; CHECK-NEXT: mv a5, a0 +; CHECK-NEXT: mv a6, a3 +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB30_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a5, a4, 2 -; CHECK-NEXT: add a5, a0, a5 ; CHECK-NEXT: vl1re32.v v8, (a5) ; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: add a4, a4, a1 ; CHECK-NEXT: vs1r.v v8, (a5) -; CHECK-NEXT: bne a4, a2, .LBB30_3 +; CHECK-NEXT: sub a6, a6, a2 +; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: bnez a6, .LBB30_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a3, .LBB30_7 +; CHECK-NEXT: beqz a4, .LBB30_7 ; CHECK-NEXT: .LBB30_5: # %for.body.preheader -; CHECK-NEXT: slli a1, a2, 2 +; CHECK-NEXT: slli a1, a3, 2 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a0, a0, a2 @@ -1898,31 +1908,31 @@ define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frsub_scalable: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a1, .LBB31_2 +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: bgeu a3, a2, .LBB31_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB31_5 ; CHECK-NEXT: .LBB31_2: # %vector.ph -; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: addi a2, a1, -1 -; CHECK-NEXT: andi a3, a2, 1024 -; CHECK-NEXT: xori a2, a3, 1024 -; CHECK-NEXT: vsetvli a5, zero, e32, m1, ta, ma +; CHECK-NEXT: addi a3, a2, -1 +; CHECK-NEXT: andi a4, a3, 1024 +; CHECK-NEXT: xori a3, a4, 1024 +; CHECK-NEXT: mv a5, a0 +; CHECK-NEXT: mv a6, a3 +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB31_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a5, a4, 2 -; CHECK-NEXT: add a5, a0, a5 ; CHECK-NEXT: vl1re32.v v8, (a5) ; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: add a4, a4, a1 ; CHECK-NEXT: vs1r.v v8, (a5) -; CHECK-NEXT: bne a4, a2, .LBB31_3 +; CHECK-NEXT: sub a6, a6, a2 +; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: bnez a6, .LBB31_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a3, .LBB31_7 +; CHECK-NEXT: beqz a4, .LBB31_7 ; CHECK-NEXT: .LBB31_5: # %for.body.preheader -; CHECK-NEXT: slli a1, a2, 2 +; CHECK-NEXT: slli a1, a3, 2 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a0, a0, a2 @@ -2064,35 +2074,36 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-LABEL: sink_splat_fma_scalable: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 2 -; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a2, .LBB34_2 +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: bgeu a4, a3, .LBB34_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: j .LBB34_5 ; CHECK-NEXT: .LBB34_2: # %vector.ph -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: addi a3, a2, -1 -; CHECK-NEXT: andi a4, a3, 1024 -; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma +; CHECK-NEXT: addi a4, a3, -1 +; CHECK-NEXT: andi a5, a4, 1024 +; CHECK-NEXT: xori a4, a5, 1024 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a1 +; CHECK-NEXT: mv t0, a4 +; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB34_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a7, a0, a6 -; CHECK-NEXT: vl1re32.v v8, (a7) -; CHECK-NEXT: add a6, a1, a6 -; CHECK-NEXT: vl1re32.v v9, (a6) +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vl1re32.v v9, (a7) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: vs1r.v v9, (a7) -; CHECK-NEXT: bne a5, a3, .LBB34_3 +; CHECK-NEXT: vs1r.v v9, (a6) +; CHECK-NEXT: sub t0, t0, a3 +; CHECK-NEXT: add a7, a7, a2 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: bnez t0, .LBB34_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a4, .LBB34_7 +; CHECK-NEXT: beqz a5, .LBB34_7 ; CHECK-NEXT: .LBB34_5: # %for.body.preheader -; CHECK-NEXT: slli a3, a3, 2 -; CHECK-NEXT: add a2, a1, a3 -; CHECK-NEXT: add a0, a0, a3 +; CHECK-NEXT: slli a4, a4, 2 +; CHECK-NEXT: add a2, a1, a4 +; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: lui a3, 1 ; CHECK-NEXT: add a1, a1, a3 ; CHECK-NEXT: .LBB34_6: # %for.body @@ -2163,35 +2174,36 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noali ; CHECK-LABEL: sink_splat_fma_commute_scalable: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 2 -; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a2, .LBB35_2 +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: bgeu a4, a3, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: j .LBB35_5 ; CHECK-NEXT: .LBB35_2: # %vector.ph -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: addi a3, a2, -1 -; CHECK-NEXT: andi a4, a3, 1024 -; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m1, ta, ma +; CHECK-NEXT: addi a4, a3, -1 +; CHECK-NEXT: andi a5, a4, 1024 +; CHECK-NEXT: xori a4, a5, 1024 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a1 +; CHECK-NEXT: mv t0, a4 +; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB35_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a7, a0, a6 -; CHECK-NEXT: vl1re32.v v8, (a7) -; CHECK-NEXT: add a6, a1, a6 -; CHECK-NEXT: vl1re32.v v9, (a6) +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vl1re32.v v9, (a7) ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 -; CHECK-NEXT: add a5, a5, a2 -; CHECK-NEXT: vs1r.v v9, (a7) -; CHECK-NEXT: bne a5, a3, .LBB35_3 +; CHECK-NEXT: vs1r.v v9, (a6) +; CHECK-NEXT: sub t0, t0, a3 +; CHECK-NEXT: add a7, a7, a2 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: bnez t0, .LBB35_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a4, .LBB35_7 +; CHECK-NEXT: beqz a5, .LBB35_7 ; CHECK-NEXT: .LBB35_5: # %for.body.preheader -; CHECK-NEXT: slli a3, a3, 2 -; CHECK-NEXT: add a2, a1, a3 -; CHECK-NEXT: add a0, a0, a3 +; CHECK-NEXT: slli a4, a4, 2 +; CHECK-NEXT: add a2, a1, a4 +; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: lui a3, 1 ; CHECK-NEXT: add a1, a1, a3 ; CHECK-NEXT: .LBB35_6: # %for.body @@ -2474,28 +2486,29 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_udiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB42_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB42_5 ; CHECK-NEXT: .LBB42_2: # %vector.ph -; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB42_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: vl2re32.v v8, (a6) ; CHECK-NEXT: vdivu.vx v8, v8, a1 -; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: bne a5, a3, .LBB42_3 +; CHECK-NEXT: sub a7, a7, a2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: bnez a7, .LBB42_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB42_7 ; CHECK-NEXT: .LBB42_5: # %for.body.preheader @@ -2564,28 +2577,29 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_sdiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB43_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB43_5 ; CHECK-NEXT: .LBB43_2: # %vector.ph -; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB43_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: vl2re32.v v8, (a6) ; CHECK-NEXT: vdiv.vx v8, v8, a1 -; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: bne a5, a3, .LBB43_3 +; CHECK-NEXT: sub a7, a7, a2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: bnez a7, .LBB43_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB43_7 ; CHECK-NEXT: .LBB43_5: # %for.body.preheader @@ -2654,28 +2668,29 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_urem_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB44_5 ; CHECK-NEXT: .LBB44_2: # %vector.ph -; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB44_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: vl2re32.v v8, (a6) ; CHECK-NEXT: vremu.vx v8, v8, a1 -; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: bne a5, a3, .LBB44_3 +; CHECK-NEXT: sub a7, a7, a2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: bnez a7, .LBB44_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB44_7 ; CHECK-NEXT: .LBB44_5: # %for.body.preheader @@ -2744,28 +2759,29 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_srem_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a2, a2, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a2, a5, 1 ; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: bgeu a3, a2, .LBB45_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB45_5 ; CHECK-NEXT: .LBB45_2: # %vector.ph -; CHECK-NEXT: li a5, 0 ; CHECK-NEXT: addi a3, a2, -1 ; CHECK-NEXT: andi a4, a3, 1024 ; CHECK-NEXT: xori a3, a4, 1024 -; CHECK-NEXT: vsetvli a6, zero, e32, m2, ta, ma +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: mv a7, a3 +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma ; CHECK-NEXT: .LBB45_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a6, a5, 2 -; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: vl2re32.v v8, (a6) ; CHECK-NEXT: vrem.vx v8, v8, a1 -; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: vs2r.v v8, (a6) -; CHECK-NEXT: bne a5, a3, .LBB45_3 +; CHECK-NEXT: sub a7, a7, a2 +; CHECK-NEXT: add a6, a6, a5 +; CHECK-NEXT: bnez a7, .LBB45_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB45_7 ; CHECK-NEXT: .LBB45_5: # %for.body.preheader diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index 15bbc1f9f6e35..f93022c9d132d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -635,17 +635,17 @@ define void @vlmax(i64 %N, ptr %c, ptr %a, ptr %b) { ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: vsetvli a6, zero, e64, m1, ta, ma +; CHECK-NEXT: slli a5, a6, 3 ; CHECK-NEXT: .LBB12_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a7, a4, 3 -; CHECK-NEXT: add a5, a2, a7 -; CHECK-NEXT: vle64.v v8, (a5) -; CHECK-NEXT: add a5, a3, a7 -; CHECK-NEXT: vle64.v v9, (a5) +; CHECK-NEXT: vle64.v v8, (a2) +; CHECK-NEXT: vle64.v v9, (a3) ; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: add a7, a7, a1 +; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: add a4, a4, a6 -; CHECK-NEXT: vse64.v v8, (a7) +; CHECK-NEXT: add a1, a1, a5 +; CHECK-NEXT: add a3, a3, a5 +; CHECK-NEXT: add a2, a2, a5 ; CHECK-NEXT: blt a4, a0, .LBB12_2 ; CHECK-NEXT: .LBB12_3: # %for.end ; CHECK-NEXT: ret @@ -682,13 +682,13 @@ define void @vector_init_vlmax(i64 %N, ptr %c) { ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma +; CHECK-NEXT: slli a4, a3, 3 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB13_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a4, a2, 3 -; CHECK-NEXT: add a4, a4, a1 +; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: add a2, a2, a3 -; CHECK-NEXT: vse64.v v8, (a4) +; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: blt a2, a0, .LBB13_2 ; CHECK-NEXT: .LBB13_3: # %for.end ; CHECK-NEXT: ret @@ -718,15 +718,15 @@ define void @vector_init_vsetvli_N(i64 %N, ptr %c) { ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: vsetvli a3, a0, e64, m1, ta, ma -; CHECK-NEXT: vsetvli a4, zero, e64, m1, ta, ma +; CHECK-NEXT: slli a4, a3, 3 +; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB14_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a4, a2, 3 -; CHECK-NEXT: add a4, a4, a1 -; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vse64.v v8, (a4) +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: blt a2, a0, .LBB14_2 ; CHECK-NEXT: .LBB14_3: # %for.end ; CHECK-NEXT: ret @@ -754,15 +754,15 @@ define void @vector_init_vsetvli_fv(i64 %N, ptr %c) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: vsetivli a3, 4, e64, m1, ta, ma -; CHECK-NEXT: vsetvli a4, zero, e64, m1, ta, ma +; CHECK-NEXT: slli a4, a3, 3 +; CHECK-NEXT: vsetvli a5, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB15_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: slli a4, a2, 3 -; CHECK-NEXT: add a4, a4, a1 -; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vse64.v v8, (a4) +; CHECK-NEXT: vse64.v v8, (a1) +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: blt a2, a0, .LBB15_1 ; CHECK-NEXT: # %bb.2: # %for.end ; CHECK-NEXT: ret diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll index c4558a55e729f..a8446c5103176 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll @@ -357,11 +357,11 @@ define void @loop_invariant_definition(i64 %arg) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[T1:%.*]] ; CHECK: t1: -; CHECK-NEXT: [[T2:%.*]] = phi i64 [ [[T3:%.*]], [[T1]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[T3]] = add nuw i64 [[T2]], 1 +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[T1]] ], [ -1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 1 ; CHECK-NEXT: br i1 true, label [[T4:%.*]], label [[T1]] ; CHECK: t4: -; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T2]] to i32 +; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[LSR_IV_NEXT]] to i32 ; CHECK-NEXT: [[T6:%.*]] = add i32 [[T5]], 1 ; CHECK-NEXT: [[T7:%.*]] = icmp eq i32 [[T5]], [[T6]] ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll index 18ab64758e49e..9c11bd064ad47 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll @@ -38,14 +38,14 @@ exit: ; preds = %loop define void @test2(ptr %a) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 128000 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[T15:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[T20:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[T19:%.*]] = getelementptr inbounds [32000 x float], ptr [[A:%.*]], i64 0, i64 [[T15]] -; CHECK-NEXT: store float 1.000000e+00, ptr [[T19]], align 4 -; CHECK-NEXT: [[T20]] = add nuw nsw i64 [[T15]], 1 -; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[T20]], 32000 -; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[A]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store float 1.000000e+00, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP2]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: call void @use(ptr [[A]]) ; CHECK-NEXT: ret void @@ -107,17 +107,18 @@ exit: ; preds = %loop define void @test4(ptr %a, ptr %b) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 128000 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[T15:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[T20:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[T16:%.*]] = getelementptr inbounds [32000 x float], ptr [[A:%.*]], i64 0, i64 [[T15]] -; CHECK-NEXT: [[T17:%.*]] = load float, ptr [[T16]], align 4 +; CHECK-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], [[LOOP]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[B]], [[ENTRY]] ] +; CHECK-NEXT: [[T17:%.*]] = load float, ptr [[LSR_IV2]], align 4 ; CHECK-NEXT: [[T18:%.*]] = fadd float [[T17]], 1.000000e+00 -; CHECK-NEXT: [[T19:%.*]] = getelementptr inbounds [32000 x float], ptr [[B:%.*]], i64 0, i64 [[T15]] -; CHECK-NEXT: store float [[T18]], ptr [[T19]], align 4 -; CHECK-NEXT: [[T20]] = add nuw nsw i64 [[T15]], 1 -; CHECK-NEXT: [[T21:%.*]] = icmp eq i64 [[T20]], 32000 -; CHECK-NEXT: br i1 [[T21]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: store float [[T18]], ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[SCEVGEP]], [[SCEVGEP4]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: call void @use(ptr [[A]]) ; CHECK-NEXT: call void @use(ptr [[B]]) diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll index 6b25aa5efd508..8d9d43202f0d9 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution-dbg-msg.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts ; RUN: llc < %s -O3 -mattr=+v -debug -lsr-drop-solution 2>&1 | FileCheck --check-prefix=DEBUG %s +; RUN: llc < %s -O3 -mattr=+v -debug 2>&1 | FileCheck --check-prefix=DEBUG2 %s target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" target triple = "riscv64-unknown-linux-gnu" @@ -9,6 +10,7 @@ define ptr @foo(ptr %a0, ptr %a1, i64 %a2) { ;DEBUG: The chosen solution requires 3 instructions 6 regs, with addrec cost 1, plus 2 base adds, plus 5 setup cost ;DEBUG: Baseline is more profitable than chosen solution, dropping LSR solution. +;DEBUG2: Baseline is more profitable than chosen solution, add option 'lsr-drop-solution' to drop LSR solution. entry: %0 = ptrtoint ptr %a0 to i64 %1 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %a2, i64 0, i64 3) diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll index aa7781e4374bf..4914bb72d8945 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll @@ -20,33 +20,53 @@ define i32 @main() { ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[CALL]], align 4 ; CHECK-NEXT: ret i32 0 ; CHECK: [[BB2]]: -; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[ADD:%.*]], %[[BB2]] ] -; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr [[STRUCT:%.*]], ptr [[CALL]], i64 [[PHI]] -; CHECK-NEXT: [[SCEVGEP32:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 8 +; CHECK-NEXT: [[LSR_IV30:%.*]] = phi i64 [ [[LSR_IV_NEXT31:%.*]], %[[BB2]] ], [ 8, [[BB:%.*]] ] +; CHECK-NEXT: [[LSR_IV27:%.*]] = phi i64 [ [[LSR_IV_NEXT28:%.*]], %[[BB2]] ], [ 12, [[BB]] ] +; CHECK-NEXT: [[LSR_IV24:%.*]] = phi i64 [ [[LSR_IV_NEXT25:%.*]], %[[BB2]] ], [ 16, [[BB]] ] +; CHECK-NEXT: [[LSR_IV21:%.*]] = phi i64 [ [[LSR_IV_NEXT22:%.*]], %[[BB2]] ], [ 20, [[BB]] ] +; CHECK-NEXT: [[LSR_IV18:%.*]] = phi i64 [ [[LSR_IV_NEXT19:%.*]], %[[BB2]] ], [ 24, [[BB]] ] +; CHECK-NEXT: [[LSR_IV15:%.*]] = phi i64 [ [[LSR_IV_NEXT16:%.*]], %[[BB2]] ], [ 28, [[BB]] ] +; CHECK-NEXT: [[LSR_IV12:%.*]] = phi i64 [ [[LSR_IV_NEXT13:%.*]], %[[BB2]] ], [ 32, [[BB]] ] +; CHECK-NEXT: [[LSR_IV9:%.*]] = phi i64 [ [[LSR_IV_NEXT10:%.*]], %[[BB2]] ], [ 36, [[BB]] ] +; CHECK-NEXT: [[LSR_IV4:%.*]] = phi i64 [ [[LSR_IV_NEXT5:%.*]], %[[BB2]] ], [ 40, [[BB]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], %[[BB2]] ], [ 48, [[BB]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[BB2]] ], [ 72, [[BB]] ] +; CHECK-NEXT: [[SCEVGEP32:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV30]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP32]], align 8 -; CHECK-NEXT: [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 12 +; CHECK-NEXT: [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV27]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP29]], align 4 -; CHECK-NEXT: [[SCEVGEP26:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 16 +; CHECK-NEXT: [[SCEVGEP26:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV24]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP26]], align 8 -; CHECK-NEXT: [[SCEVGEP23:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 20 +; CHECK-NEXT: [[SCEVGEP23:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV21]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP23]], align 4 -; CHECK-NEXT: [[SCEVGEP20:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 24 +; CHECK-NEXT: [[SCEVGEP20:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV18]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP20]], align 8 -; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 28 +; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV15]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP17]], align 4 -; CHECK-NEXT: [[SCEVGEP14:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 32 +; CHECK-NEXT: [[SCEVGEP14:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV12]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP14]], align 8 -; CHECK-NEXT: [[SCEVGEP11:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 36 +; CHECK-NEXT: [[SCEVGEP11:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV9]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP11]], align 4 -; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 40 +; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV4]] ; CHECK-NEXT: store i64 0, ptr [[SCEVGEP6]], align 8 -; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 48 +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV1]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP3]], align 8 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 72 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP]], align 8 -; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR]], i64 80 +; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV4]] +; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SCEVGEP7]], i64 40 ; CHECK-NEXT: store i64 0, ptr [[SCEVGEP8]], align 8 -; CHECK-NEXT: [[ADD]] = add i64 [[PHI]], 1 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 88 +; CHECK-NEXT: [[LSR_IV_NEXT2]] = add i64 [[LSR_IV1]], 88 +; CHECK-NEXT: [[LSR_IV_NEXT5]] = add i64 [[LSR_IV4]], 88 +; CHECK-NEXT: [[LSR_IV_NEXT10]] = add i64 [[LSR_IV9]], 88 +; CHECK-NEXT: [[LSR_IV_NEXT13]] = add i64 [[LSR_IV12]], 88 +; CHECK-NEXT: [[LSR_IV_NEXT16]] = add i64 [[LSR_IV15]], 88 +; CHECK-NEXT: [[LSR_IV_NEXT19]] = add i64 [[LSR_IV18]], 88 +; CHECK-NEXT: [[LSR_IV_NEXT22]] = add i64 [[LSR_IV21]], 88 +; CHECK-NEXT: [[LSR_IV_NEXT25]] = add i64 [[LSR_IV24]], 88 +; CHECK-NEXT: [[LSR_IV_NEXT28]] = add i64 [[LSR_IV27]], 88 +; CHECK-NEXT: [[LSR_IV_NEXT31]] = add i64 [[LSR_IV30]], 88 ; CHECK-NEXT: br label %[[BB2]] ; 0: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits