On Thu, May 13, 2021 at 05:37:36PM +0200, Jakub Jelinek wrote: > So, do you want something like (I've deleted the old comment as I think > the new one is enough, but am open to keep both) the patch below, where > it REG_CAN_CHANGE_MODE_P is false, we punt (return), otherwise call > set_value_regno? > Am not sure if those REG_CAN_CHANGE_MODE_P arguments is what you want > though.
Oops, missing !, meant following which works on 11 branch for the testcase: 2021-05-13 Jakub Jelinek <ja...@redhat.com> PR rtl-optimization/100342 * regcprop.c (copy_value): When copying a source reg in a wider mode than it has recorded for the value, adjust recorded destination mode too or punt if !REG_CAN_CHANGE_MODE_P. * gcc.target/i386/pr100342.c: New test. --- gcc/regcprop.c.jj 2021-03-23 10:21:07.176447920 +0100 +++ gcc/regcprop.c 2021-05-13 17:36:46.443192451 +0200 @@ -358,34 +358,25 @@ copy_value (rtx dest, rtx src, struct va else if (sn > hard_regno_nregs (sr, vd->e[sr].mode)) return; - /* It is not safe to link DEST into the chain if SRC was defined in some - narrower mode M and if M is also narrower than the mode of the first - register in the chain. For example: - (set (reg:DI r1) (reg:DI r0)) - (set (reg:HI r2) (reg:HI r1)) - (set (reg:SI r3) (reg:SI r2)) //Should be a new chain start at r3 - (set (reg:SI r4) (reg:SI r1)) - (set (reg:SI r5) (reg:SI r4)) - - the upper part of r3 is undefined. If we added it to the chain, - it may be used to replace r5, which has defined upper bits. - See PR98694 for details. - - [A] partial_subreg_p (vd->e[sr].mode, GET_MODE (src)) - [B] partial_subreg_p (vd->e[sr].mode, vd->e[vd->e[sr].oldest_regno].mode) - Condition B is added to to catch optimization opportunities of - - (set (reg:HI R1) (reg:HI R0)) - (set (reg:SI R2) (reg:SI R1)) // [A] - (set (reg:DI R3) (reg:DI R2)) // [A] - (set (reg:SI R4) (reg:SI R[0-3])) - (set (reg:HI R5) (reg:HI R[0-4])) - - in which all registers have only 16 defined bits. */ - else if (partial_subreg_p (vd->e[sr].mode, GET_MODE (src)) - && partial_subreg_p (vd->e[sr].mode, - vd->e[vd->e[sr].oldest_regno].mode)) - return; + /* If a narrower value is copied using wider mode, the upper bits + are undefined (could be e.g. a former paradoxical subreg). Signal + in that case we've only copied value using the narrower mode. + Consider: + (set (reg:DI r14) (mem:DI ...)) + (set (reg:QI si) (reg:QI r14)) + (set (reg:DI bp) (reg:DI r14)) + (set (reg:DI r14) (const_int ...)) + (set (reg:DI dx) (reg:DI si)) + (set (reg:DI si) (const_int ...)) + (set (reg:DI dx) (reg:DI bp)) + The last set is not redundant, while the low 8 bits of dx are already + equal to low 8 bits of bp, the other bits are undefined. */ + else if (partial_subreg_p (vd->e[sr].mode, GET_MODE (src))) + { + if (!REG_CAN_CHANGE_MODE_P (sr, GET_MODE (src), vd->e[sr].mode)) + return; + set_value_regno (dr, vd->e[sr].mode, vd); + } /* Link DR at the end of the value chain used by SR. */ --- gcc/testsuite/gcc.target/i386/pr100342.c.jj 2021-05-13 17:28:41.181460465 +0200 +++ gcc/testsuite/gcc.target/i386/pr100342.c 2021-05-13 17:28:41.181460465 +0200 @@ -0,0 +1,70 @@ +/* PR rtl-optimization/100342 */ +/* { dg-do run { target int128 } } */ +/* { dg-options "-O2 -fno-dse -fno-forward-propagate -Wno-psabi -mno-sse2" } */ + +#define SHL(x, y) ((x) << ((y) & (sizeof(x) * 8 - 1))) +#define SHR(x, y) ((x) >> ((y) & (sizeof(x) * 8 - 1))) +#define ROR(x, y) (SHR(x, y)) | (SHL(x, (sizeof(x) * 8 - (y)))) +#define SHLV(x, y) ((x) << ((y) & (sizeof((x)[0]) * 8 - 1))) +#define SHLSV(x, y) ((x) << ((y) & (sizeof((y)[0]) * 8 - 1))) +typedef unsigned char A; +typedef unsigned char __attribute__((__vector_size__ (8))) B; +typedef unsigned char __attribute__((__vector_size__ (16))) C; +typedef unsigned char __attribute__((__vector_size__ (32))) D; +typedef unsigned char __attribute__((__vector_size__ (64))) E; +typedef unsigned short F; +typedef unsigned short __attribute__((__vector_size__ (16))) G; +typedef unsigned int H; +typedef unsigned int __attribute__((__vector_size__ (32))) I; +typedef unsigned long long J; +typedef unsigned long long __attribute__((__vector_size__ (8))) K; +typedef unsigned long long __attribute__((__vector_size__ (32))) L; +typedef unsigned long long __attribute__((__vector_size__ (64))) M; +typedef unsigned __int128 N; +typedef unsigned __int128 __attribute__((__vector_size__ (16))) O; +typedef unsigned __int128 __attribute__((__vector_size__ (32))) P; +typedef unsigned __int128 __attribute__((__vector_size__ (64))) Q; +B v1; +D v2; +L v3; +K v4; +I v5; +O v6; + +B +foo (A a, C b, E c, F d, G e, H f, J g, M h, N i, P j, Q k) +{ + b &= (A) f; + k += a; + G l = e; + D m = v2 >= (A) (J) v1; + J r = a + g; + L n = v3 <= f; + k -= i / f; + l -= (A) g; + c |= (A) d; + b -= (A) i; + J o = ROR (__builtin_clz (r), a); + K p = v4 | f, q = v4 <= f; + P s = SHLV (SHLSV (__builtin_bswap64 (i), (P) (0 < j)) <= 0, j); + n += a <= r; + M t = (M) (a / SHLV (c, 0)) != __builtin_bswap64 (i); + I u = f - v5; + E v = (E) h + (E) t + (E) k; + D w = (union { D b[2]; }) { }.b[0] + ((union { E b; }) v).b[1] + m + (D) u + (D) n + (D) s; + C x = ((union { D b; }) w).b[1] + b + (C) l + (C) v6; + B y = ((union { C a; B b; }) x).b + ((union { C a; B b[2]; }) x).b[1] + (B) p + (B) q; + J z = i + o; + F z2 = z; + A z3 = z2; + return y + z3; +} + +int +main () +{ + B x = foo (0, (C) { }, (E) { }, 10, (G) { }, 4, 2, (M) { }, 123842323652213865LL, (P) { 1 }, (Q) { }); + if ((J) x != 0x2e2c2e2c2e2c2e30ULL) + __builtin_abort(); + return 0; +} Jakub