================ @@ -403,236 +396,253 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vmv1r.v v0, v13 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 84 +; RV32-NEXT: li a3, 80 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v4, v8, v24, v0 +; RV32-NEXT: vmerge.vvm v12, v8, v24, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 28 +; RV32-NEXT: li a3, 20 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v14 +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 76 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 68 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmerge.vvm v16, v24, v16, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 76 +; RV32-NEXT: li a2, 72 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 84 +; RV32-NEXT: li a2, 80 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 68 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 32 ; RV32-NEXT: addi a1, a1, 4 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: li a2, 36 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v20, v8, v16 +; RV32-NEXT: vrgatherei16.vv v16, v8, v12 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 52 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: vmv.v.v v16, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 84 +; RV32-NEXT: li a2, 80 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 48 ; RV32-NEXT: lui a2, %hi(.LCPI8_3) ; RV32-NEXT: addi a2, a2, %lo(.LCPI8_3) ; RV32-NEXT: addi a1, a1, 5 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v28, (a2) +; RV32-NEXT: vle16.v v24, (a2) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v20, a1 +; RV32-NEXT: vmv.v.x v25, a1 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v8, v12, v20 +; RV32-NEXT: vrgatherei16.vv v16, v8, v25 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 44 +; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: vmv.v.v v16, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 52 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v24, v12, v28 +; RV32-NEXT: vrgatherei16.vv v16, v8, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 28 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v24, v16 +; RV32-NEXT: vmv.v.v v16, v8 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI8_4) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_4) ; RV32-NEXT: lui a2, %hi(.LCPI8_5) ; RV32-NEXT: addi a2, a2, %lo(.LCPI8_5) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v12, (a1) +; RV32-NEXT: vle16.v v26, (a1) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v28, (a2) +; RV32-NEXT: vle16.v v24, (a2) ; RV32-NEXT: lui a1, %hi(.LCPI8_6) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_6) ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle16.v v30, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v16, v0, v12 +; RV32-NEXT: vle16.v v2, (a1) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 12 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v16, v26 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v12, v20, v28 +; RV32-NEXT: vrgatherei16.vv v20, v4, v24 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v12, v16 +; RV32-NEXT: vmv.v.v v20, v8 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v0, v30 +; RV32-NEXT: vrgatherei16.vv v24, v8, v2 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI8_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_7) ; RV32-NEXT: lui a2, %hi(.LCPI8_8) ; RV32-NEXT: addi a2, a2, %lo(.LCPI8_8) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v20, (a1) +; RV32-NEXT: vle16.v v12, (a1) ; RV32-NEXT: lui a1, %hi(.LCPI8_9) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_9) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v8, (a2) +; RV32-NEXT: vle16.v v16, (a2) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vle16.v v10, (a1) +; RV32-NEXT: vle16.v v18, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 28 +; RV32-NEXT: li a2, 20 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v28, v0, v20 +; RV32-NEXT: vrgatherei16.vv v24, v0, v12 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ---------------- wangpc-pp wrote:
Ditto. https://github.com/llvm/llvm-project/pull/125533 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits