llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-risc-v Author: Pengcheng Wang (wangpc-pp) <details> <summary>Changes</summary> This can help to improve the register pressure for LMUL>1 cases. --- Patch is 202.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115843.diff 40 Files Affected: - (modified) llvm/lib/Target/RISCV/RISCVSubtarget.cpp (+4) - (modified) llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll (+22-23) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll (+16-16) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll (+4-4) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll (+10-10) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll (+121-133) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll (+2-2) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll (+2-2) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll (+9-10) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll (+26-26) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll (+2-2) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll (+12-12) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll (+18-16) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll (+1-1) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll (+127-116) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll (+18-18) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll (+59-70) - (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll (+2-2) - (modified) llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll (+14-14) - (modified) llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll (+212-244) - (modified) llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll (+30-30) - (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll (+10-10) - (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll (+22-22) - (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll (+4-4) - (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll (+2-2) - (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll (+108-44) - (modified) llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll (+18-26) - (modified) llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll (+18-26) - (modified) llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll (+18-26) - (modified) llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll (+18-26) - (modified) llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll (+18-26) - (modified) llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll (+18-26) - (modified) llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll (+18-26) - (modified) llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll (+18-26) - (modified) llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll (+16-24) - (modified) llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll (+4-8) - (modified) llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll (+4-8) - (modified) llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll (+18-26) - (modified) llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll (+18-26) - (modified) llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll (+16-16) ``````````diff diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 03397e1e0d89ee..3eae2b9774203f 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -211,4 +211,8 @@ void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, // Spilling is generally expensive on all RISC-V cores, so always enable // register-pressure tracking. This will increase compile time. Policy.ShouldTrackPressure = true; + + // Enabling ShouldTrackLaneMasks when vector instructions are supported. + // TODO: Add extensions that need register pairs as well? + Policy.ShouldTrackLaneMasks = hasVInstructions(); } diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll index 26e86d41176e04..5a38ec36068f93 100644 --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -24,31 +24,31 @@ define void @_Z3foov() { ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49) ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48) -; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46) -; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vle16.v v12, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45) -; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vle16.v v14, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40) ; CHECK-NEXT: #APP @@ -58,27 +58,26 @@ define void @_Z3foov() { ; CHECK-NEXT: lui a0, 1048572 ; CHECK-NEXT: addi a0, a0, 928 ; CHECK-NEXT: vmsbc.vx v0, v8, a0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vl2r.v v12, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vl2r.v v14, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44) +; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: vl2r.v v10, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: vl2r.v v12, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: vl2r.v v14, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vle16.v v14, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v14, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl1r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu -; CHECK-NEXT: vsext.vf2 v8, v14, v0.t -; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44) -; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44) -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vle16.v v14, (a0) +; CHECK-NEXT: vsext.vf2 v8, v16, v0.t ; CHECK-NEXT: lui a0, %hi(var_47) ; CHECK-NEXT: addi a0, a0, %lo(var_47) ; CHECK-NEXT: vsseg4e16.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll index ce83e2d8a62206..fea88673084a29 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll @@ -16,33 +16,33 @@ define <512 x i8> @single_source(<512 x i8> %a) { ; CHECK-NEXT: addi s0, sp, 1536 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: andi sp, sp, -512 -; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: li a0, 512 ; CHECK-NEXT: addi a1, sp, 512 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv.x.s a2, v16 -; CHECK-NEXT: vslidedown.vi v24, v16, 5 +; CHECK-NEXT: vmv.x.s a2, v8 +; CHECK-NEXT: vslidedown.vi v24, v8, 5 ; CHECK-NEXT: li a3, 432 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vse8.v v8, (a1) -; CHECK-NEXT: vmv.v.x v8, a2 -; CHECK-NEXT: lbu a0, 770(sp) -; CHECK-NEXT: li a1, 431 -; CHECK-NEXT: vslide1down.vx v8, v8, a0 -; CHECK-NEXT: lbu a0, 1012(sp) +; CHECK-NEXT: li a0, 431 +; CHECK-NEXT: vmv.v.x v16, a2 +; CHECK-NEXT: lbu a1, 770(sp) +; CHECK-NEXT: vslide1down.vx v16, v16, a1 +; CHECK-NEXT: lbu a1, 1012(sp) ; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v24, a1 +; CHECK-NEXT: vslideup.vx v16, v24, a0 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v16, 4 -; CHECK-NEXT: li a1, 466 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: li a0, 465 +; CHECK-NEXT: vslidedown.vi v24, v8, 4 +; CHECK-NEXT: li a0, 466 +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: li a1, 465 ; CHECK-NEXT: li a2, 501 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v24, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma +; CHECK-NEXT: vslideup.vx v16, v24, a1 ; CHECK-NEXT: li a0, 500 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: vslideup.vx v16, v8, a0 +; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: addi sp, s0, -1536 ; CHECK-NEXT: .cfi_def_cfa sp, 1536 ; CHECK-NEXT: ld ra, 1528(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll index dbbb8362144cab..b94a523e130440 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll @@ -502,17 +502,17 @@ define <8 x i32> @add_constant_rhs_8xi32_vector_in2(<8 x i32> %vin, i32 %a, i32 ; CHECK-NEXT: addi a1, a1, 25 ; CHECK-NEXT: addi a2, a2, 1 ; CHECK-NEXT: addi a3, a3, 2047 -; CHECK-NEXT: addi a3, a3, 308 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 4 ; CHECK-NEXT: vmv.s.x v10, a1 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 5 +; CHECK-NEXT: addi a0, a3, 308 ; CHECK-NEXT: vmv.s.x v10, a2 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 6 -; CHECK-NEXT: vmv.s.x v10, a3 +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 7 ; CHECK-NEXT: ret @@ -534,15 +534,15 @@ define <8 x i32> @add_constant_rhs_8xi32_vector_in3(<8 x i32> %vin, i32 %a, i32 ; CHECK-NEXT: addi a1, a1, 25 ; CHECK-NEXT: addi a2, a2, 1 ; CHECK-NEXT: addi a3, a3, 2047 -; CHECK-NEXT: addi a3, a3, 308 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: vmv.s.x v10, a1 ; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: addi a0, a3, 308 ; CHECK-NEXT: vmv.s.x v10, a2 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: vmv.s.x v10, a3 +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index ae5dbfa4bf30ba..6a8d98d55289bf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -17,25 +17,25 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) { ; CHECK-NEXT: vlm.v v8, (a0) ; CHECK-NEXT: li a0, -256 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vadd.vv v11, v9, v9 +; CHECK-NEXT: vadd.vv v12, v9, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vadd.vi v12, v11, -16 +; CHECK-NEXT: vadd.vi v13, v12, -16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; CHECK-NEXT: vadd.vi v11, v11, -15 -; CHECK-NEXT: vmerge.vim v13, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vadd.vi v12, v12, -15 ; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 -; CHECK-NEXT: vnsrl.wi v8, v14, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vrgather.vv v8, v13, v12, v0.t -; CHECK-NEXT: vnsrl.wi v12, v14, 8 +; CHECK-NEXT: vrgather.vv v8, v14, v13, v0.t +; CHECK-NEXT: vnsrl.wi v13, v10, 8 ; CHECK-NEXT: vmsne.vi v10, v8, 0 -; CHECK-NEXT: vrgather.vv v12, v13, v11, v0.t -; CHECK-NEXT: vmsne.vi v8, v12, 0 +; CHECK-NEXT: vrgather.vv v13, v14, v12, v0.t +; CHECK-NEXT: vmsne.vi v8, v13, 0 ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret %vec = load <32 x i1>, ptr %p diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index b0f8bc9dcc6bd5..127428f8d5a299 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1266,19 +1266,16 @@ define <32 x double> @buildvec_v32f64(double %e0, double %e1, double %e2, double define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7, double %e8, double %e9, double %e10, double %e11, double %e12, double %e13, double %e14, double %e15, double %e16, double %e17, double %e18, double %e19, double %e20, double %e21, double %e22, double %e23, double %e24, double %e25, double %e26, double %e27, double %e28, double %e29, double %e30, double %e31) vscale_range(2,2) { ; RV32-LABEL: buildvec_v32f64_exact_vlen: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -96 -; RV32-NEXT: .cfi_def_cfa_offset 96 -; RV32-NEXT: fsd fs0, 88(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs1, 80(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs2, 72(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs3, 64(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs4, 56(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs5, 48(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs6, 40(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs7, 32(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs8, 24(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs9, 16(sp) # 8-byte Folded Spill -; RV32-NEXT: fsd fs10, 8(sp) # 8-byte Folded Spill +; RV32-NEXT: addi sp, sp, -80 +; RV32-NEXT: .cfi_def_cfa_offset 80 +; RV32-NEXT: fsd fs0, 72(sp) # 8-byte Folded Spill +; RV32-NEXT: fsd fs1, 64(sp) # 8-byte Folded Spill +; RV32-NEXT: fsd fs2, 56(sp) # 8-byte Folded Spill +; RV32-NEXT: fsd fs3, 48(sp) # 8-byte Folded Spill +; RV32-NEXT: fsd fs4, 40(sp) # 8-byte Folded Spill +; RV32-NEXT: fsd fs5, 32(sp) # 8-byte Folded Spill +; RV32-NEXT: fsd fs6, 24(sp) # 8-byte Folded Spill +; RV32-NEXT: fsd fs7, 16(sp) # 8-byte Folded Spill ; RV32-NEXT: .cfi_offset fs0, -8 ; RV32-NEXT: .cfi_offset fs1, -16 ; RV32-NEXT: .cfi_offset fs2, -24 @@ -1287,85 +1284,79 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double ; RV32-NEXT: .cfi_offset fs5, -48 ; RV32-NEXT: .cfi_offset fs6, -56 ; RV32-NEXT: .cfi_offset fs7, -64 -; RV32-NEXT: .cfi_offset fs8, -72 -; RV32-NEXT: .cfi_offset fs9, -80 -; RV32-NEXT: .cfi_offset fs10, -88 -; RV32-NEXT: sw a6, 0(sp) -; RV32-NEXT: sw a7, 4(sp) -; RV32-NEXT: fld ft0, 248(sp) -; RV32-NEXT: fld ft1, 240(sp) -; RV32-NEXT: fld ft2, 232(sp) -; RV32-NEXT: fld ft3, 224(sp) -; RV32-NEXT: fld ft6, 216(sp) -; RV32-NEXT: fld ft8, 208(sp) -; RV32-NEXT: fld ft10, 200(sp) -; RV32-NEXT: fld fs1, 192(sp) -; RV32-NEXT: fld ft11, 184(sp) -; RV32-NEXT: fld fs4, 176(sp) -; RV32-NEXT: fld fs2, 168(sp) -; RV32-NEXT: fld fs5, 160(sp) -; RV32-NEXT: fld fs3, 136(sp) -; RV32-NEXT: fld fs6, 128(sp) -; RV32-NEXT: fld fs7, 152(sp) -; RV32-NEXT: fld fs8, 144(sp) -; RV32-NEXT: fld ft4, 120(sp) -; RV32-NEXT: fld ft5, 112(sp) -; RV32-NEXT: fld ft7, 104(sp) -; RV32-NEXT: fld ft9, 96(sp) +; RV32-NEXT: sw a6, 8(sp) +; RV32-NEXT: sw a7, 12(sp) +; RV32-NEXT: fld ft0, 232(sp) +; RV32-NEXT: fld ft4, 224(sp) +; RV32-NEXT: fld ft1, 216(sp) +; RV32-NEXT: fld ft7, 208(sp) +; RV32-NEXT: fld ft2, 200(sp) +; RV32-NEXT: fld ft10, 192(sp) +; RV32-NEXT: fld ft3, 184(sp) +; RV32-NEXT: fld fs1, 176(sp) +; RV32-NEXT: fld ft5, 168(sp) +; RV32-NEXT: fld fs2, 160(sp) +; RV32-NEXT: fld ft6, 152(sp) +; RV32-NEXT: fld fs3, 144(sp) +; RV32-NEXT: fld ft8, 120(sp) +; RV32-NEXT: fld fs4, 112(sp) +; RV32-NEXT: fld ft9, 136(sp) +; RV32-NEXT: fld fs5, 128(sp) +; RV32-NEXT: fld ft11, 104(sp) +; RV32-NEXT: fld fs6, 96(sp) +; RV32-NEXT: fld fs0, 88(sp) +; RV32-NEXT: fld fs7, 80(sp) ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vfmv.v.f v8, fa2 -; RV32-NEXT: fld fa2, 0(sp) -; RV32-NEXT: sw a4, 0(sp) -; RV32-NEXT: sw a5, 4(sp) -; RV32-NEXT: fld fs0, 0(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: fld fs9, 0(sp) -; RV32-NEXT: sw a0, 0(sp) -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: fld fs10, 0(sp) -; RV32-NEXT: vfmv.v.f v9, fs8 -; RV32-NEXT: vfmv.v.f v10, fs6 -; RV32-NEXT: vfmv.v.f v11, fs5 -; RV32-NEXT: vfmv.v.f v12, fs4 -; RV32-NEXT: vfmv.v.f v13, fs1 -; RV32-NEXT: vfslide1down.vf v17, v9, fs7 -; RV32-NEXT: vfslide1down.vf v16, v10, fs3 -; RV32-NEXT: vfslide1down.vf v18, v11, fs2 -; RV32-NEXT: vfmv.v.f v9, fs10 -; RV32-NEXT: vfslide1down.vf v19, v12, ft11 -; RV32-NEXT: vfslide1down.vf v20, v13, ft10 -; RV32-NEXT: vfslide1down.vf v12, v9, fs9 +; RV32-NEXT: vfmv.v.f v10, fa0 +; RV32-NEXT: vfmv.v.f v11, fa4 +; RV32-NEXT: vfmv.v.f v12, fa6 +; RV32-NEXT: fld fa4, 8(sp) +; RV32-NEXT: sw a4, 8(sp) +; RV32-NEXT: sw a5, 12(sp) ; RV32-NEXT: vfslide1down.vf v9, v8, fa3 -; RV32-NEXT: vfmv.v.f v8, ft8 -; RV32-NEXT: vfslide1down.vf v21, v8, ft6 -; RV32-NEXT: vfmv.v.f v8, fa0 -; RV32-NEXT: vfslide1down.vf v8, v8, fa1 -; RV32-NEXT: vfmv.v.f v10, ft3 -; RV32-NEXT: vfslide1down.vf v22, v10, ft2 -; RV32-NEXT: vfmv.v.f v10, fa4 -; RV32-NEXT: vfslide1down.vf v10, v10, fa5 -; RV32-NEXT: vfmv.v.f v11, fa6 -; RV32-NEXT: vfslide1down.vf v11, v11, fa7 -; RV32-NEXT: vfmv.v.f v13, fs0 -; RV32-NEXT: vfslide1down.vf v13, v13, fa2 -; RV32-NEXT: vfmv.v.f v14, ft9 -; RV32-NEXT: vfslide1down.vf v14, v14, ft7 -; RV32-NEXT: vfmv.v.f v15, ft5 -; RV32-NEXT: vfslide1down.vf v15, v15, ft4 -; RV32-NEXT: vfmv.v.f v23, ft1 -; RV32-NEXT: vfslide1down.vf v23, v23, ft0 -; RV32-NEXT: fld fs0, 88(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs1, 80(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs2, 72(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs3, 64(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs4, 56(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs5, 48(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs6, 40(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs7, 32(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs8, 24(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs9, 16(sp) # 8-byte Folded Reload -; RV32-NEXT: fld fs10, 8(sp) # 8-byte Folded Reload +; RV32-NEXT: vfslide1down.vf v8, v10, fa1 +; RV32-NEXT: vfslide1down.vf v10, v11, fa5 +; RV32-NEXT: vfslide1down.vf v11, v12, fa7 +; RV32-NEXT: fld fa5, 8(sp) +; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: fld fa3, 8(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: fld fa2, 8(sp) +; RV32-NEXT: vfmv.v.f v12, fs7 +; RV32-NEXT: vfmv.v.f v13, fs6 +; RV32-NEXT: vfmv.v.f v16, fs5 +; RV32-NEXT: vfmv.v.f v18, fs4 +; RV32-NEXT: vfmv.v.f v19, fs3 +; RV32-NEXT: vfmv.v.f v20, fs2 +; RV32-NEXT: vfmv.v.f v21, fs1 +; RV32-NEXT: vfmv.v.f v22, ft10 +; RV32-NEXT: vfmv.v.f v23, ft7 +; RV32-NEXT: vfmv.v.f v24, ft4 +; RV32-NEXT: vfslide1down.vf v14, v12, fs0 +; RV32-NEXT: vfslide1down.vf v15, v13, ft11 +; RV32-NEXT: vfslide1down.vf v17, v16, ft9 +; RV32-NEXT: vfslide1down.vf v16, v18, ft8 +; RV32-NEXT: vfslide1down.vf v18, v19, ft6 +; RV32-NEXT: vfslide1down.vf v19, v20, ft5 +; RV32-NEXT: vfslide1down.vf v20, v21, ft3 +; RV32-NEXT: vfslide1down.vf v21, v22, ft2 +; RV32-NEXT: vfslide1down.vf v22, v23, ft1 +; RV32-NEXT: vfmv.v.f v12, fa5 +; RV32-NEXT: vfslide1down.vf v13, v12, fa4 +; RV32-NEXT: vfmv.v.f v12, fa2 +; RV32-NEXT: vfslide1down.vf v12, v12, fa3 +; RV32-NEXT: vfslide1down.vf v23, v24, ft0 +; RV32-NEXT: fld fs0, 72(sp) # 8-byte Folded Reload +; RV32-NEXT: fld fs1, 64(sp) # 8-byte Folded Reload +; RV32-NEXT: fld fs2, 56(sp) # 8-byte Folded Reload +; RV32-NEXT: fld fs3, 48(sp) # 8-byte Folded Reload +; RV32-NEXT: fld fs4, 40(sp) # 8-byte Folded Reload +; RV32-NEXT: fld fs5, 32(sp) # 8-byte Folded Reload +; RV32-NEXT: fld fs6, 24(sp) # 8-byte Folded Reload +; RV32-NEXT: fld fs7, 16(sp) # 8-byte Folded Reload ; RV32-NEXT: .cfi_restore fs0 ; RV32-NEXT: .cfi_restore fs1 ; RV32-NEXT: .cfi_restore fs2 @@ -1374,10 +1365,7 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double ; RV32-NEXT: .cfi_restore fs5 ; RV32-NEXT: .cfi_restore fs6 ; RV32-NEXT: .cfi_restore fs7 -; RV32-NEXT: .cfi_restore fs8 -; RV32-NEXT: .cfi_restore fs9 -; RV32-NEXT: .cfi_restore fs10 -; RV32-NEXT: addi sp, sp, 96 +; RV32-NEXT: addi sp, sp, 80 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; @@ -1401,25 +1389,25 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double ; RV64-NEXT: .cfi_offset fs5, -48 ; RV64-NEXT: .cfi_offset fs6, -56 ; RV64-NEXT: .cfi_offset fs7, -64 -; RV64-NEXT: fmv.d.x ft6, a7 -; RV64-NEXT: fmv.d.x ft9, a5 -; RV64-NEXT: fmv.d.x ft10, a3 -; RV64-NEXT: fmv.d.x ft11, a1 +; RV64-NEXT: fmv.d.x ft11, a7 +; RV64-NEXT: fmv.d.x fs0, a5 +; RV64-NEXT: fmv.d.x fs1, a3 +; RV64-NEXT: fmv.d.x fs2, a1 ; RV64-NEXT: fld ft0, 184(sp) -; RV64-NEXT: fld ft1, 176(sp) -; RV64-NEXT: fld ft2, 168(sp) -; RV64-NEXT: fld ft3, 160(sp) -; RV64-NEXT: fld ft4, 152(sp) -; RV64-NEXT: fld ft5, 144(sp) -; RV64-NEXT: fld ft7, 136(sp) -; RV64-NEXT: fld ft8, 128(sp) -; RV64-NEXT: fld fs0, 120(sp) -; RV64-NEXT... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/115843 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits