https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107949
Bug ID: 107949 Summary: PPC: Unnecessary rlwinm after lbzx Product: gcc Version: 12.2.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: jens.seifert at de dot ibm.com Target Milestone: --- extern unsigned char magic1[256]; unsigned int hash(const unsigned char inp[4]) { const unsigned long long INIT = 0x1ULL; unsigned long long h1 = INIT; h1 = magic1[((unsigned long long)inp[0]) ^ h1]; h1 = magic1[((unsigned long long)inp[1]) ^ h1]; h1 = magic1[((unsigned long long)inp[2]) ^ h1]; h1 = magic1[((unsigned long long)inp[3]) ^ h1]; return h1; } #ifdef __powerpc__ #define lbzx(b,c) ({ unsigned long long r; __asm__("lbzx %0,%1,%2":"=r"(r):"b"(b),"r"(c)); r; }) unsigned int hash2(const unsigned char inp[4]) { const unsigned long long INIT = 0x1ULL; unsigned long long h1 = INIT; h1 = lbzx(magic1, inp[0] ^ h1); h1 = lbzx(magic1, inp[1] ^ h1); h1 = lbzx(magic1, inp[2] ^ h1); h1 = lbzx(magic1, inp[3] ^ h1); return h1; } #endif Extra rlwinm get added. hash(unsigned char const*): .LCF0: addi 2,2,.TOC.-.LCF0@l lbz 9,0(3) addis 10,2,.LC0@toc@ha ld 10,.LC0@toc@l(10) lbz 6,1(3) lbz 7,2(3) lbz 8,3(3) xori 9,9,0x1 lbzx 9,10,9 xor 9,9,6 rlwinm 9,9,0,0xff <= not necessary lbzx 9,10,9 xor 9,9,7 rlwinm 9,9,0,0xff <= not necessary lbzx 9,10,9 xor 9,9,8 rlwinm 9,9,0,0xff <= not necessary lbzx 3,10,9 blr .long 0 .byte 0,9,0,0,0,0,0,0 hash2(unsigned char const*): .LCF1: addi 2,2,.TOC.-.LCF1@l lbz 7,0(3) lbz 8,1(3) lbz 10,2(3) lbz 6,3(3) addis 9,2,.LC1@toc@ha ld 9,.LC1@toc@l(9) xori 7,7,0x1 lbzx 7,9,7 xor 8,8,7 lbzx 8,9,8 xor 10,10,8 lbzx 10,9,10 xor 10,6,10 lbzx 3,9,10 rldicl 3,3,0,32 blr Tiny sample: unsigned long long tiny(const unsigned char *inp) { return inp[0] ^ inp[1]; } tiny(unsigned char const*): lbz 9,0(3) lbz 10,1(3) xor 3,9,10 rlwinm 3,3,0,0xff blr .long 0 .byte 0,9,0,0,0,0,0,0 unsigned long long tiny2(const unsigned char *inp) { unsigned long long a = inp[0]; unsigned long long b = inp[1]; return a ^ b; } tiny2(unsigned char const*): lbz 9,0(3) lbz 10,1(3) xor 3,9,10 rlwinm 3,3,0,0xff blr .long 0 .byte 0,9,0,0,0,0,0,0 lbz/lbzx creates a value 0 <= x < 256. xor of 2 such values does not change value range.