https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118618
Bug ID: 118618 Summary: RISC-V: Zcmp extension and RVV auto-vectorization are both enabled,the sp register error. Product: gcc Version: 14.2.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c Assignee: unassigned at gcc dot gnu.org Reporter: bigmagicreadsun at gmail dot com Target Milestone: --- I found that when both the RISC-V Zve32 and Zcmp extensions are enabled during compilation, RVV automatically performs vectorization, and SP also saves RVV vector registers. However, it seems that Zcmp does not take this condition into account when handling SP. Here is a practical example: When I only enable Zve32, -march=rv32imafdc_zve32f 8000185e <core_bench_matrix>: 8000185e: 1101 c.addi sp,-32 80001860: c22022f3 csrrs t0,vlenb,zero 80001864: 00052803 lw a6,0(a0) 80001868: ca26 c.swsp s1,20(sp) 8000186a: ce06 c.swsp ra,28(sp) 8000186c: cc22 c.swsp s0,24(sp) 8000186e: c84a c.swsp s2,16(sp) 80001870: c64e c.swsp s3,12(sp) 80001872: c452 c.swsp s4,8(sp) 80001874: c256 c.swsp s5,4(sp) 80001876: 00229313 slli t1,t0,0x2 8000187a: 4144 c.lw s1,4(a0) 8000187c: 40610133 sub sp,sp,t1 80001880: 0a080c63 beq a6,zero,80001938 <core_bench_matrix+0xda> 80001884: 0c8077d7 vsetvli a5,zero,e16,m1,ta,ma 80001888: 5e05c157 vmv.v.x v2,a1 8000188c: 8526 c.mv a0,s1 8000188e: 00181a13 slli s4,a6,0x1 80001892: 4981 c.li s3,0 80001894: 86aa c.mv a3,a0 80001896: 8442 c.mv s0,a6 80001898: 872a c.mv a4,a0 8000189a: 0c8477d7 vsetvli a5,s0,e16,m1,ta,ma 8000189e: 0206d087 vle16.v v1,(a3) 800018a2: 00179613 slli a2,a5,0x1 800018a6: 8c1d c.sub s0,a5 800018a8: 96b2 c.add a3,a2 800018aa: 021100d7 vadd.vv v1,v1,v2 800018ae: 020750a7 vse16.v v1,(a4) 800018b2: 9732 c.add a4,a2 800018b4: f07d c.bnez s0,8000189a <core_bench_matrix+0x3c> 800018b6: 00198913 addi s2,s3,1 800018ba: 9552 c.add a0,s4 800018bc: 01280463 beq a6,s2,800018c4 <core_bench_matrix+0x66> 800018c0: 89ca c.mv s3,s2 800018c2: bfc9 c.j 80001894 <core_bench_matrix+0x36> 800018c4: c2202af3 csrrs s5,vlenb,zero 800018c8: 002a9793 slli a5,s5,0x2 800018cc: 415787b3 sub a5,a5,s5 800018d0: 978a c.add a5,sp 800018d2: 4581 c.li a1,0 800018d4: 4501 c.li a0,0 800018d6: 02878127 vs1r.v v2,(a5) 800018da: 7ce000ef jal ra,800020a8 <crcu8> 800018de: 002a9793 slli a5,s5,0x2 800018e2: 415787b3 sub a5,a5,s5 800018e6: 978a c.add a5,sp 800018e8: 0287d107 vl1re16.v v2,(a5) 800018ec: 8626 c.mv a2,s1 800018ee: 86ca c.mv a3,s2 800018f0: 8726 c.mv a4,s1 800018f2: 0c86f7d7 vsetvli a5,a3,e16,m1,ta,ma 800018f6: 02065087 vle16.v v1,(a2) 800018fa: 00179593 slli a1,a5,0x1 800018fe: 8e9d c.sub a3,a5 80001900: 962e c.add a2,a1 80001902: 021100d7 vadd.vv v1,v1,v2 80001906: 020750a7 vse16.v v1,(a4) 8000190a: 972e c.add a4,a1 8000190c: f2fd c.bnez a3,800018f2 <core_bench_matrix+0x94> 8000190e: 94d2 c.add s1,s4 80001910: 00140793 addi a5,s0,1 80001914: 00898463 beq s3,s0,8000191c <core_bench_matrix+0xbe> 80001918: 843e c.mv s0,a5 8000191a: bfc9 c.j 800018ec <core_bench_matrix+0x8e> 8000191c: c22022f3 csrrs t0,vlenb,zero 80001920: 00229313 slli t1,t0,0x2 80001924: 911a c.add sp,t1 80001926: 40f2 c.lwsp ra,28(sp) 80001928: 4462 c.lwsp s0,24(sp) 8000192a: 44d2 c.lwsp s1,20(sp) 8000192c: 4942 c.lwsp s2,16(sp) 8000192e: 49b2 c.lwsp s3,12(sp) 80001930: 4a22 c.lwsp s4,8(sp) 80001932: 4a92 c.lwsp s5,4(sp) 80001934: 6105 c.addi16sp sp,32 80001936: 8082 c.jr ra 80001938: c22022f3 csrrs t0,vlenb,zero 8000193c: 00229313 slli t1,t0,0x2 80001940: 911a c.add sp,t1 80001942: 40f2 c.lwsp ra,28(sp) 80001944: 4462 c.lwsp s0,24(sp) 80001946: 44d2 c.lwsp s1,20(sp) 80001948: 4942 c.lwsp s2,16(sp) 8000194a: 49b2 c.lwsp s3,12(sp) 8000194c: 4a22 c.lwsp s4,8(sp) 8000194e: 4a92 c.lwsp s5,4(sp) 80001950: 4581 c.li a1,0 80001952: 4501 c.li a0,0 80001954: 6105 c.addi16sp sp,32 80001956: 7520006f jal zero,800020a8 <crcu8> When both Zve32 and Zc extensions are enabled., -march=rv32imafdc__zve32f_zca_zcb_zcf_zcmp_zcmt 80001706 <core_bench_matrix>: 80001706: b8a6 cm.push {ra,s0-s5},-48 80001708: 00052803 lw a6,0(a0) 8000170c: 4144 c.lw s1,4(a0) 8000170e: 08080e63 beq a6,zero,800017aa <core_bench_matrix+0xa4> 80001712: 0c8077d7 vsetvli a5,zero,e16,m1,ta,ma 80001716: 5e05c157 vmv.v.x v2,a1 8000171a: 8526 c.mv a0,s1 8000171c: 00181a13 slli s4,a6,0x1 80001720: 4981 c.li s3,0 80001722: 86aa c.mv a3,a0 80001724: 8442 c.mv s0,a6 80001726: 872a c.mv a4,a0 80001728: 0c8477d7 vsetvli a5,s0,e16,m1,ta,ma 8000172c: 0206d087 vle16.v v1,(a3) 80001730: 00179613 slli a2,a5,0x1 80001734: 8c1d c.sub s0,a5 80001736: 96b2 c.add a3,a2 80001738: 021100d7 vadd.vv v1,v1,v2 8000173c: 020750a7 vse16.v v1,(a4) 80001740: 9732 c.add a4,a2 80001742: f07d c.bnez s0,80001728 <core_bench_matrix+0x22> 80001744: 00198913 addi s2,s3,1 80001748: 9552 c.add a0,s4 8000174a: 01280463 beq a6,s2,80001752 <core_bench_matrix+0x4c> 8000174e: 89ca c.mv s3,s2 80001750: bfc9 c.j 80001722 <core_bench_matrix+0x1c> 80001752: c2202af3 csrrs s5,vlenb,zero 80001756: 002a9793 slli a5,s5,0x2 8000175a: 415787b3 sub a5,a5,s5 8000175e: 978a c.add a5,sp 80001760: 4581 c.li a1,0 80001762: 4501 c.li a0,0 80001764: 02878127 vs1r.v v2,(a5) 80001768: 2739 c.jal 80001e76 <crcu8> 8000176a: 002a9793 slli a5,s5,0x2 8000176e: 415787b3 sub a5,a5,s5 80001772: 978a c.add a5,sp 80001774: 0287d107 vl1re16.v v2,(a5) 80001778: 8626 c.mv a2,s1 8000177a: 86ca c.mv a3,s2 8000177c: 8726 c.mv a4,s1 8000177e: 0c86f7d7 vsetvli a5,a3,e16,m1,ta,ma 80001782: 02065087 vle16.v v1,(a2) 80001786: 00179593 slli a1,a5,0x1 8000178a: 8e9d c.sub a3,a5 8000178c: 962e c.add a2,a1 8000178e: 021100d7 vadd.vv v1,v1,v2 80001792: 020750a7 vse16.v v1,(a4) 80001796: 972e c.add a4,a1 80001798: f2fd c.bnez a3,8000177e <core_bench_matrix+0x78> 8000179a: 94d2 c.add s1,s4 8000179c: 00140793 addi a5,s0,1 800017a0: 00898463 beq s3,s0,800017a8 <core_bench_matrix+0xa2> 800017a4: 843e c.mv s0,a5 800017a6: bfc9 c.j 80001778 <core_bench_matrix+0x72> 800017a8: bea6 cm.popret {ra,s0-s5},48 800017aa: baa6 cm.pop {ra,s0-s5},48 800017ac: 4581 c.li a1,0 800017ae: 4501 c.li a0,0 800017b0: a5d9 c.j 80001e76 <crcu8> It seems that Zcmp does not take the presence of vector registers into account.