https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107093
--- Comment #4 from Hongtao.liu <crazylht at gmail dot com> --- change "*k, CBC" to "?k, CBC", in *mov{qi,hi,si,di}_internal. then RA works good to choose kxnor for setting constm1_rtx to mask register, and i got below with your attached patch(change #if 0 to #if 1), seems better than orginal patch. 6foo: 7.LFB0: 8 .cfi_startproc 9 testl %edi, %edi 10 jle .L9 11 kxnorb %k1, %k1, %k1 12 cmpl $4, %edi 13 jl .L11 14.L3: 15 vbroadcastsd .LC2(%rip), %ymm3 16 vmovdqa .LC0(%rip), %xmm2 17 xorl %eax, %eax 18 xorl %ecx, %ecx 19 .p2align 4,,10 20 .p2align 3 21.L7: 22 vmovapd b(%rax), %ymm0{%k1} 23 addl $4, %ecx 24 movl %edi, %edx 25 vmulpd %ymm3, %ymm0, %ymm1 26 subl %ecx, %edx 27 cmpl $4, %edx 28 vmovapd %ymm1, a(%rax){%k1} 29 vpbroadcastd %edx, %xmm1 30 movl $-1, %edx 31 vpcmpd $1, %xmm1, %xmm2, %k1 32 kmovb %k1, %esi 33 cmovge %edx, %esi 34 addq $32, %rax 35 kmovb %esi, %k1 36 kortestb %k1, %k1 37 jne .L7 38 vzeroupper 39.L9: 40 ret 41 .p2align 4,,10 42 .p2align 3 43.L11: 44 vmovdqa .LC0(%rip), %xmm2 45 vpbroadcastd %edi, %xmm1 46 vpcmpd $1, %xmm1, %xmm2, %k1 47 jmp .L3 48 .cfi_endproc