On 07/09/15 23:10, Michael Matz wrote: > Hi, > > On Mon, 7 Sep 2015, Kugan wrote: > >> For the following testcase (compiling with -O1; -O2 works fine), we have >> a stmt with stm_code SSA_NAME (_7 = _ 6) and for which _6 is defined by >> a GIMPLE_CALL. In this case, we are using wrong SUNREG promoted mode >> resulting in wrong code. > > And why is that? > >> Simple SSA_NAME copes are generally optimized >> but when they are not, we can end up using the wrong promoted mode. >> Attached patch fixes when we have one copy. > > I think it's the wrong place to fixing up. Where does the wrong use come > from? At that place it should be fixed, not after the fact. > >> _6 = bar5 (-10); >> ... >> _7 = _6; >> _3 = (long unsigned int) _6; >> ... >> if (_3 != l5.0_4) > > There is no use of '_7' in this snippet so I don't see the relevance of > SUBREG_PROMOTED_MODE on it. > > But whatever you do, please make sure you include the testcase for the > problem as a regression test: >
Thanks for the review. This happens in ARM where definition of PROMOTED_MODE also changes the sign. I am attaching the cfgdump for the test-case. This is part of the existing test-case thats why I didn't include it as part of this patch. for ;; _7 = _6; (subreg:HI (reg:SI 113) 0) <ssa_name 0x7fd672c3ad38 type <integer_type 0x7fd672c36540 short int HI size <integer_cst 0x7fd672c430c0 constant 16> unit size <integer_cst 0x7fd672c430d8 constant 2> align 16 symtab 0 alias set -1 canonical type 0x7fd672c36540 precision 16 min <integer_cst 0x7fd672c43078 -32768> max <integer_cst 0x7fd672c43090 32767>> def_stmt _7 = _6; version 7> decl_rtl -> (reg:SI 113) temp -> (subreg:HI (reg:SI 113) 0) Unsignedp = 1 and we expand it to: ;; _7 = _6; (insn 10 9 0 (set (reg:SI 113) (zero_extend:SI (subreg/u:HI (reg:SI 113) 0))) -1 (nil)) but: short int _6; short int _7; insn 10 above is wrong. _6 is defined by a call and therefore the sign change in promoted mode is not true. We should probably rearrange/or add a copy propagation to remove this unnecessary copy but still this looks wrong to me. Thanks, Kugan
;; Function foo5 (foo5, funcdef_no=0, decl_uid=4147, cgraph_uid=0, symbol_order=0) foo5 (int x) { unsigned int _2; unsigned int _4; short unsigned int _5; ;; basic block 2, loop depth 0 ;; pred: ENTRY _4 = (unsigned int) x_1(D); _2 = _4 & 65535; _5 = (short unsigned int) x_1(D); return _5; ;; succ: EXIT } Partition map Partition 1 (x_1(D) - 1 ) Partition 2 (_2 - 2 ) Partition 4 (_4 - 4 ) Partition 5 (_5 - 5 ) Coalescible Partition map Partition 0, base 0 (x_1(D) - 1 ) Partition map Partition 0 (x_1(D) - 1 ) Conflict graph: After sorting: Coalesce List: Partition map Partition 0 (x_1(D) - 1 ) After Coalescing: Partition map Partition 0 (x_1(D) - 1 ) Partition 1 (_2 - 2 ) Partition 2 (_4 - 4 ) Partition 3 (_5 - 5 ) Replacing Expressions _4 replace with --> _4 = (unsigned int) x_1(D); _5 replace with --> _5 = (short unsigned int) x_1(D); foo5 (int x) { unsigned int _2; unsigned int _4; short unsigned int _5; ;; basic block 2, loop depth 0 ;; pred: ENTRY _4 = (unsigned int) x_1(D); _2 = _4 & 65535; _5 = (short unsigned int) x_1(D); return _5; ;; succ: EXIT } ;; Generating RTL for gimple basic block 2 (const_int 65535 [0xffff]) Hot cost: 4 (final) (const_int 65535 [0xffff]) Hot cost: 4 (final) ;; _2 = _4 & 65535; (insn 6 5 0 (set (reg:SI 111) (zero_extend:SI (subreg:HI (reg/v:SI 110 [ x ]) 0))) -1 (nil)) ;; return _5; (insn 7 6 8 (set (reg:HI 115) (subreg:HI (reg/v:SI 110 [ x ]) 0)) pr39240.c:6 -1 (nil)) (insn 8 7 9 (set (reg:SI 116) (zero_extend:SI (reg:HI 115))) pr39240.c:6 -1 (nil)) (insn 9 8 10 (set (reg:SI 114 [ <retval> ]) (reg:SI 116)) pr39240.c:6 -1 (nil)) (jump_insn 10 9 11 (set (pc) (label_ref 0)) pr39240.c:6 -1 (nil)) (barrier 11 10 0) try_optimize_cfg iteration 1 Merging block 3 into block 2... Merged blocks 2 and 3. Merged 2 and 3 without moving. Removing jump 10. Merging block 4 into block 2... Merged blocks 2 and 4. Merged 2 and 4 without moving. try_optimize_cfg iteration 2 fix_loop_structure: fixing up loops for function ;; ;; Full RTL generated for this function: ;; (note 1 0 4 NOTE_INSN_DELETED) ;; basic block 2, loop depth 0, count 0, freq 10000, maybe hot ;; prev block 0, next block 1, flags: (NEW, REACHABLE, RTL) ;; pred: ENTRY [100.0%] (FALLTHRU) (note 4 1 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK) (insn 2 4 3 2 (set (reg/v:SI 110 [ x ]) (reg:SI 0 r0 [ x ])) pr39240.c:5 -1 (nil)) (note 3 2 6 2 NOTE_INSN_FUNCTION_BEG) (insn 6 3 7 2 (set (reg:SI 111) (zero_extend:SI (subreg:HI (reg/v:SI 110 [ x ]) 0))) -1 (nil)) (insn 7 6 8 2 (set (reg:HI 115) (subreg:HI (reg/v:SI 110 [ x ]) 0)) pr39240.c:6 -1 (nil)) (insn 8 7 9 2 (set (reg:SI 116) (zero_extend:SI (reg:HI 115))) pr39240.c:6 -1 (nil)) (insn 9 8 13 2 (set (reg:SI 114 [ <retval> ]) (reg:SI 116)) pr39240.c:6 -1 (nil)) (insn 13 9 14 2 (set (reg/i:SI 0 r0) (reg:SI 114 [ <retval> ])) pr39240.c:7 -1 (nil)) (insn 14 13 0 2 (use (reg/i:SI 0 r0)) pr39240.c:7 -1 (nil)) ;; succ: EXIT [100.0%] (FALLTHRU) ;; Function bar5 (bar5, funcdef_no=1, decl_uid=4150, cgraph_uid=1, symbol_order=1) bar5 (int x) { int _2; unsigned int _4; int _5; short unsigned int _6; int _7; short int _8; ;; basic block 2, loop depth 0 ;; pred: ENTRY _2 = x_1(D) + 6; _6 = foo5 (_2); _4 = (unsigned int) _6; _7 = (int) _6; _5 = (_7) sext from bit (16); _8 = (short int) _5; return _8; ;; succ: EXIT } Partition map Partition 1 (x_1(D) - 1 ) Partition 2 (_2 - 2 ) Partition 4 (_4 - 4 ) Partition 5 (_5 - 5 ) Partition 6 (_6 - 6 ) Partition 7 (_7 - 7 ) Partition 8 (_8 - 8 ) Coalescible Partition map Partition 0, base 0 (x_1(D) - 1 ) Partition map Partition 0 (x_1(D) - 1 ) Conflict graph: After sorting: Coalesce List: Partition map Partition 0 (x_1(D) - 1 ) After Coalescing: Partition map Partition 0 (x_1(D) - 1 ) Partition 1 (_2 - 2 ) Partition 2 (_4 - 4 ) Partition 3 (_5 - 5 ) Partition 4 (_6 - 6 ) Partition 5 (_7 - 7 ) Partition 6 (_8 - 8 ) Replacing Expressions _2 replace with --> _2 = x_1(D) + 6; _5 replace with --> _5 = (_7) sext from bit (16); _7 replace with --> _7 = (int) _6; _8 replace with --> _8 = (short int) _5; bar5 (int x) { int _2; unsigned int _4; int _5; short unsigned int _6; int _7; short int _8; ;; basic block 2, loop depth 0 ;; pred: ENTRY _2 = x_1(D) + 6; _6 = foo5 (_2); _4 = (unsigned int) _6; _7 = (int) _6; _5 = (_7) sext from bit (16); _8 = (short int) _5; return _8; ;; succ: EXIT } ;; Generating RTL for gimple basic block 2 (const_int 6 [0x6]) Hot cost: 4 (final) (const_int 6 [0x6]) Hot cost: 4 (final) ;; _6 = foo5 (_2); (insn 6 5 7 (set (reg:SI 118) (plus:SI (reg/v:SI 110 [ x ]) (const_int 6 [0x6]))) pr39240.c:12 -1 (nil)) (insn 7 6 8 (set (reg:SI 0 r0) (reg:SI 118)) pr39240.c:12 -1 (nil)) (call_insn/u 8 7 9 (parallel [ (set (reg:SI 0 r0) (call (mem:SI (symbol_ref:SI ("foo5") [flags 0x3] <function_decl 0x7f3734ef7300 foo5>) [0 foo5 S4 A32]) (const_int 0 [0]))) (use (const_int 0 [0])) (clobber (reg:SI 14 lr)) ]) pr39240.c:12 -1 (expr_list:REG_EH_REGION (const_int 0 [0]) (nil)) (expr_list (clobber (reg:SI 12 ip)) (expr_list:SI (use (reg:SI 0 r0)) (nil)))) (insn 9 8 10 (set (reg:SI 119) (reg:SI 0 r0)) pr39240.c:12 -1 (nil)) (insn 10 9 0 (set (reg:SI 114) (reg:SI 119)) pr39240.c:12 -1 (nil)) ;; _4 = (unsigned int) _6; (insn 11 10 0 (set (reg:SI 112) (reg:SI 114)) -1 (nil)) ;; return _8; (insn 12 11 13 (set (reg:SI 121) (sign_extend:SI (subreg:HI (reg:SI 114) 0))) pr39240.c:12 -1 (nil)) (insn 13 12 14 (set (reg:HI 120) (subreg:HI (reg:SI 121) 0)) pr39240.c:12 -1 (nil)) (insn 14 13 15 (set (reg:SI 122) (sign_extend:SI (reg:HI 120))) pr39240.c:12 -1 (nil)) (insn 15 14 16 (set (reg:SI 117 [ <retval> ]) (reg:SI 122)) pr39240.c:12 -1 (nil)) (jump_insn 16 15 17 (set (pc) (label_ref 0)) pr39240.c:12 -1 (nil)) (barrier 17 16 0) try_optimize_cfg iteration 1 Merging block 3 into block 2... Merged blocks 2 and 3. Merged 2 and 3 without moving. Removing jump 16. Merging block 4 into block 2... Merged blocks 2 and 4. Merged 2 and 4 without moving. try_optimize_cfg iteration 2 fix_loop_structure: fixing up loops for function ;; ;; Full RTL generated for this function: ;; (note 1 0 4 NOTE_INSN_DELETED) ;; basic block 2, loop depth 0, count 0, freq 10000, maybe hot ;; prev block 0, next block 1, flags: (NEW, REACHABLE, RTL) ;; pred: ENTRY [100.0%] (FALLTHRU) (note 4 1 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK) (insn 2 4 3 2 (set (reg/v:SI 110 [ x ]) (reg:SI 0 r0 [ x ])) pr39240.c:11 -1 (nil)) (note 3 2 6 2 NOTE_INSN_FUNCTION_BEG) (insn 6 3 7 2 (set (reg:SI 118) (plus:SI (reg/v:SI 110 [ x ]) (const_int 6 [0x6]))) pr39240.c:12 -1 (nil)) (insn 7 6 8 2 (set (reg:SI 0 r0) (reg:SI 118)) pr39240.c:12 -1 (nil)) (call_insn/u 8 7 9 2 (parallel [ (set (reg:SI 0 r0) (call (mem:SI (symbol_ref:SI ("foo5") [flags 0x3] <function_decl 0x7f3734ef7300 foo5>) [0 foo5 S4 A32]) (const_int 0 [0]))) (use (const_int 0 [0])) (clobber (reg:SI 14 lr)) ]) pr39240.c:12 -1 (expr_list:REG_EH_REGION (const_int 0 [0]) (nil)) (expr_list (clobber (reg:SI 12 ip)) (expr_list:SI (use (reg:SI 0 r0)) (nil)))) (insn 9 8 10 2 (set (reg:SI 119) (reg:SI 0 r0)) pr39240.c:12 -1 (nil)) (insn 10 9 11 2 (set (reg:SI 114) (reg:SI 119)) pr39240.c:12 -1 (nil)) (insn 11 10 12 2 (set (reg:SI 112) (reg:SI 114)) -1 (nil)) (insn 12 11 13 2 (set (reg:SI 121) (sign_extend:SI (subreg:HI (reg:SI 114) 0))) pr39240.c:12 -1 (nil)) (insn 13 12 14 2 (set (reg:HI 120) (subreg:HI (reg:SI 121) 0)) pr39240.c:12 -1 (nil)) (insn 14 13 15 2 (set (reg:SI 122) (sign_extend:SI (reg:HI 120))) pr39240.c:12 -1 (nil)) (insn 15 14 19 2 (set (reg:SI 117 [ <retval> ]) (reg:SI 122)) pr39240.c:12 -1 (nil)) (insn 19 15 20 2 (set (reg/i:SI 0 r0) (reg:SI 117 [ <retval> ])) pr39240.c:13 -1 (nil)) (insn 20 19 0 2 (use (reg/i:SI 0 r0)) pr39240.c:13 -1 (nil)) ;; succ: EXIT [100.0%] (FALLTHRU) ;; Function main (main, funcdef_no=2, decl_uid=4154, cgraph_uid=2, symbol_order=3) (executed once) main () { int _2; long unsigned int _3; long unsigned int l5.0_4; short int _6; short int _7; ;; basic block 2, loop depth 0 ;; pred: ENTRY _6 = bar5 (-10); _2 = (int) _6; _7 = _6; _3 = (long unsigned int) _6; l5.0_4 = l5; if (_3 != l5.0_4) goto <bb 3>; else goto <bb 4>; ;; succ: 3 ;; 4 ;; basic block 3, loop depth 0 ;; pred: 2 abort (); ;; succ: ;; basic block 4, loop depth 0 ;; pred: 2 return 0; ;; succ: EXIT } Partition map Partition 2 (_2 - 2 ) Partition 3 (_3 - 3 ) Partition 4 (l5.0_4 - 4 ) Partition 6 (_6 - 6 ) Partition 7 (_7 - 7 ) Coalescible Partition map Partition 0, base 0 (_6 - 6 7 ) Partition map Partition 0 (_6 - 6 ) Partition 1 (_7 - 7 ) Conflict graph: After sorting: Sorted Coalesce list: (10000) _6 <-> _7 Partition map Partition 0 (_6 - 6 ) Partition 1 (_7 - 7 ) Coalesce list: (6)_6 & (7)_7 [map: 0, 1] : Success -> 0 After Coalescing: Partition map Partition 0 (_2 - 2 ) Partition 1 (_3 - 3 ) Partition 2 (l5.0_4 - 4 ) Partition 3 (_6 - 6 7 ) Replacing Expressions _3 replace with --> _3 = (long unsigned int) _6; l5.0_4 replace with --> l5.0_4 = l5; main () { int _2; long unsigned int _3; long unsigned int l5.0_4; short int _6; short int _7; ;; basic block 2, loop depth 0 ;; pred: ENTRY _6 = bar5 (-10); _2 = (int) _6; _7 = _6; _3 = (long unsigned int) _6; l5.0_4 = l5; if (_3 != l5.0_4) goto <bb 3>; else goto <bb 4>; ;; succ: 3 ;; 4 ;; basic block 3, loop depth 0 ;; pred: 2 abort (); ;; succ: ;; basic block 4, loop depth 0 ;; pred: 2 return 0; ;; succ: EXIT } ;; Generating RTL for gimple basic block 2 (const_int -10 [0xfffffffffffffff6]) Hot cost: 4 (final) ;; _6 = bar5 (-10); (insn 5 4 6 (set (reg:SI 0 r0) (const_int -10 [0xfffffffffffffff6])) pr39240.c:20 -1 (nil)) (call_insn/u 6 5 7 (parallel [ (set (reg:SI 0 r0) (call (mem:SI (symbol_ref:SI ("bar5") [flags 0x3] <function_decl 0x7f3734ef7500 bar5>) [0 bar5 S4 A32]) (const_int 0 [0]))) (use (const_int 0 [0])) (clobber (reg:SI 14 lr)) ]) pr39240.c:20 -1 (expr_list:REG_EH_REGION (const_int 0 [0]) (nil)) (expr_list (clobber (reg:SI 12 ip)) (expr_list:SI (use (reg:SI 0 r0)) (nil)))) (insn 7 6 8 (set (reg:SI 115) (reg:SI 0 r0)) pr39240.c:20 -1 (nil)) (insn 8 7 0 (set (reg:SI 113) (reg:SI 115)) pr39240.c:20 -1 (nil)) ;; _2 = (int) _6; (insn 9 8 0 (set (reg:SI 110) (reg:SI 113)) -1 (nil)) ;; _7 = _6; (insn 10 9 0 (set (reg:SI 113) (zero_extend:SI (subreg/u:HI (reg:SI 113) 0))) -1 (nil)) ;; if (_3 != l5.0_4) (insn 11 10 12 (set (reg/f:SI 116) (symbol_ref:SI ("*.LANCHOR0") [flags 0x182])) pr39240.c:20 -1 (nil)) (insn 12 11 13 (set (reg:SI 117) (mem/c:SI (reg/f:SI 116) [0 l5+0 S4 A32])) pr39240.c:20 -1 (nil)) (insn 13 12 14 (set (reg:CC 100 cc) (compare:CC (reg:SI 113) (reg:SI 117))) pr39240.c:20 -1 (nil)) (jump_insn 14 13 0 (set (pc) (if_then_else (eq (reg:CC 100 cc) (const_int 0 [0])) (label_ref 0) (pc))) pr39240.c:20 -1 (int_list:REG_BR_PROB 9996 (nil))) ;; Generating RTL for gimple basic block 3 ;; abort (); (call_insn 16 15 17 (parallel [ (call (mem:SI (symbol_ref:SI ("abort") [flags 0x41] <function_decl 0x7f373511b400 abort>) [0 __builtin_abort S4 A32]) (const_int 0 [0])) (use (const_int 0 [0])) (clobber (reg:SI 14 lr)) ]) pr39240.c:21 -1 (expr_list:REG_NORETURN (const_int 0 [0]) (expr_list:REG_EH_REGION (const_int 0 [0]) (nil))) (expr_list (clobber (reg:SI 12 ip)) (nil))) (barrier 17 16 0) ;; Generating RTL for gimple basic block 4 ;; (code_label 18 17 19 5 "" [0 uses]) (note 19 18 0 NOTE_INSN_BASIC_BLOCK) ;; return 0; (insn 20 19 21 (set (reg:SI 114 [ <retval> ]) (const_int 0 [0])) -1 (nil)) (jump_insn 21 20 22 (set (pc) (label_ref 0)) -1 (nil)) (barrier 22 21 0) try_optimize_cfg iteration 1 Merging block 3 into block 2... Merged blocks 2 and 3. Merged 2 and 3 without moving. Removing jump 21. Merging block 6 into block 5... Merged blocks 5 and 6. Merged 5 and 6 without moving. try_optimize_cfg iteration 2 fix_loop_structure: fixing up loops for function ;; ;; Full RTL generated for this function: ;; (note 1 0 3 NOTE_INSN_DELETED) ;; basic block 2, loop depth 0, count 0, freq 10000, maybe hot ;; prev block 0, next block 4, flags: (NEW, REACHABLE, RTL) ;; pred: ENTRY [100.0%] (FALLTHRU) (note 3 1 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK) (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG) (insn 5 2 6 2 (set (reg:SI 0 r0) (const_int -10 [0xfffffffffffffff6])) pr39240.c:20 -1 (nil)) (call_insn/u 6 5 7 2 (parallel [ (set (reg:SI 0 r0) (call (mem:SI (symbol_ref:SI ("bar5") [flags 0x3] <function_decl 0x7f3734ef7500 bar5>) [0 bar5 S4 A32]) (const_int 0 [0]))) (use (const_int 0 [0])) (clobber (reg:SI 14 lr)) ]) pr39240.c:20 -1 (expr_list:REG_EH_REGION (const_int 0 [0]) (nil)) (expr_list (clobber (reg:SI 12 ip)) (expr_list:SI (use (reg:SI 0 r0)) (nil)))) (insn 7 6 8 2 (set (reg:SI 115) (reg:SI 0 r0)) pr39240.c:20 -1 (nil)) (insn 8 7 9 2 (set (reg:SI 113) (reg:SI 115)) pr39240.c:20 -1 (nil)) (insn 9 8 10 2 (set (reg:SI 110) (reg:SI 113)) -1 (nil)) (insn 10 9 11 2 (set (reg:SI 113) (zero_extend:SI (subreg/u:HI (reg:SI 113) 0))) -1 (nil)) (insn 11 10 12 2 (set (reg/f:SI 116) (symbol_ref:SI ("*.LANCHOR0") [flags 0x182])) pr39240.c:20 -1 (nil)) (insn 12 11 13 2 (set (reg:SI 117) (mem/c:SI (reg/f:SI 116) [0 l5+0 S4 A32])) pr39240.c:20 -1 (nil)) (insn 13 12 14 2 (set (reg:CC 100 cc) (compare:CC (reg:SI 113) (reg:SI 117))) pr39240.c:20 -1 (nil)) (jump_insn 14 13 15 2 (set (pc) (if_then_else (eq (reg:CC 100 cc) (const_int 0 [0])) (label_ref 18) (pc))) pr39240.c:20 -1 (int_list:REG_BR_PROB 9996 (nil)) -> 18) ;; succ: 4 [0.0%] (FALLTHRU) ;; 5 [100.0%] ;; basic block 4, loop depth 0, count 0, freq 4 ;; prev block 2, next block 5, flags: (NEW, REACHABLE, RTL) ;; pred: 2 [0.0%] (FALLTHRU) (note 15 14 16 4 [bb 4] NOTE_INSN_BASIC_BLOCK) (call_insn 16 15 17 4 (parallel [ (call (mem:SI (symbol_ref:SI ("abort") [flags 0x41] <function_decl 0x7f373511b400 abort>) [0 __builtin_abort S4 A32]) (const_int 0 [0])) (use (const_int 0 [0])) (clobber (reg:SI 14 lr)) ]) pr39240.c:21 -1 (expr_list:REG_NORETURN (const_int 0 [0]) (expr_list:REG_EH_REGION (const_int 0 [0]) (nil))) (expr_list (clobber (reg:SI 12 ip)) (nil))) ;; succ: (barrier 17 16 18) ;; basic block 5, loop depth 0, count 0, freq 9996, maybe hot ;; prev block 4, next block 1, flags: (NEW, REACHABLE, RTL) ;; pred: 2 [100.0%] (code_label 18 17 19 5 5 "" [1 uses]) (note 19 18 20 5 [bb 5] NOTE_INSN_BASIC_BLOCK) (insn 20 19 24 5 (set (reg:SI 114 [ <retval> ]) (const_int 0 [0])) -1 (nil)) (insn 24 20 25 5 (set (reg/i:SI 0 r0) (reg:SI 114 [ <retval> ])) pr39240.c:23 -1 (nil)) (insn 25 24 0 5 (use (reg/i:SI 0 r0)) pr39240.c:23 -1 (nil)) ;; succ: EXIT [100.0%] (FALLTHRU)
extern void abort (void); __attribute__ ((noinline)) static unsigned short int foo5 (int x) { return x; } __attribute__ ((noinline)) short int bar5 (int x) { return foo5 (x + 6); } unsigned long l5 = (short int) -4; int main (void) { if (bar5 (-10) != l5) abort (); return 0; }