https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119554
Bug ID: 119554 Summary: [risc-v][bug] Unusual Behavior Observed with RISC-V Vector Extension (RVV) Product: gcc Version: 14.2.1 Status: UNCONFIRMED Severity: normal Priority: P3 Component: rtl-optimization Assignee: unassigned at gcc dot gnu.org Reporter: akhilesh.k at samsung dot com Target Milestone: --- Hello During the RISC-V VectoSC-V Vector Extension (RVV) performance test, I observed some unusual behavior. It appears that the vector instructions vslideup.vi are not functioning as expected on QEMU (when running with rvv_ta_all_1s=true, as the default seems to be disabled) as well as on the target with GCC 14.2. sharing a sample test application to reproduce this issue. #include<stdio.h> void test(char * counter) { unsigned int *ctr = (unsigned int *)counter; unsigned int c_in = 0; unsigned int c_out = 0; for(int i = 0; i < 4; i++) { c_out = ctr[i] >> 31; ctr[i] <<= 1; ctr[i] |= c_in; c_in = c_out; } if(c_out) { ctr[0] ^= 0x00000087; } } int main () { unsigned int counter[] = {0xe826d5bb, 0xc744010d, 0xccd28c99, 0xe3781d64}; test((char *)counter); printf("test result %x %x %x %x \n", counter[0], counter[1], counter[2], counter[3]); return 0; } akhilesh.k@$ ./output/host/bin/riscv64-buildroot-linux-gnu-gcc -march=rv64gcv -g -o test ~/Activity/rvv/test.c --static -O3 akhilesh.k@$ ##### test results with rvv_ta_all_1s option ######### QEMU_CPU=rv64,vlen=128,rvv_ta_all_1s=true,rvv_ma_all_1s=true,v=true,vext_spec=v1.0 ~/qemu_bin/bin/qemu-riscv64 test test result d04dabf1 ffffffff ffffffff ffffffff akhilesh.k@$ ####### Default test results ############### akhilesh.k$ ~/qemu_bin/bin/qemu-riscv64 test test result d04dabf1 8e88021b 99a51933 c6f03ac9 akhilesh.k$ It seems that "Vector tail agnostic" bit set by the compiler incorrectly . (gdb) c Continuing. Breakpoint 2.2, test (counter=0x7f1d2898afb8 "v\253M\320", '\377' <repeats 12 times>) at /home/akhilesh.k/Activity/rvv/test.c:30 30 ctr[0] ^= 0x00000087; (gdb) info reg v v0 {q = {0xffffffffffffffffffffffffffffff08}, l = {0xffffffffffffff08, 0xffffffffffffffff}, w = {0xffffff08, 0xffffffff, 0xffffffff, 0xffffffff}, s = {0xff08, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, b = {0x8, 0xff <repeats 15 times>}} v1 {q = {0xffffffffffffffffffffffffd04dab76}, l = {0xffffffffd04dab76, 0xffffffffffffffff}, w = {0xd04dab76, 0xffffffff, 0xffffffff, 0xffffffff}, s = {0xab76, 0xd04d, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}, b = {0x76, 0xab, 0x4d, 0xd0, 0xff <repeats 12 times>}} v2 {q = {0xc6f03ac899a519328e88021ad04dab76}, l = {0x8e88021ad04dab76, 0xc6f03ac899a51932}, w = {0xd04dab76, 0x8e88021a, 0x99a51932, 0xc6f03ac8}, s = {0xab76, 0xd04d, 0x21a, 0x8e88, 0x1932, 0x99a5, 0x3ac8, 0xc6f0}, b = {0x76, 0xab, 0x4d, 0xd0, 0x1a, 0x2, 0x88, 0x8e, 0x32, 0x19, 0xa5, 0x99, 0xc8, 0x3a, 0xf0, 0xc6}} v3 {q = {0x1}, l = {0x1, 0x0}, w = {0x1, 0x0, 0x0, 0x0}, s = {0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x1, 0x0 <repeats 15 times>}} v4 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v5 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v6 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v7 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v8 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v9 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v10 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v11 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v12 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v13 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v14 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v15 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v16 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v17 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v18 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v19 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v20 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v21 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v22 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v23 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v24 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v25 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v26 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v27 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v28 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v29 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v30 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} v31 {q = {0x0}, l = {0x0, 0x0}, w = {0x0, 0x0, 0x0, 0x0}, s = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x0 <repeats 16 times>}} (gdb) p/x $vtype $1 = 0xd0 (gdb)