https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122686
--- Comment #17 from Haochen Jiang <haochen.jiang at intel dot com> ---
Before the patch series:
;; basic block 21, loop depth 1
;; pred: 20
_2569 = (unsigned int) _18;
_2570 = _2569 + 1;
_2571 = (int) _2570;
;; succ: 22
;; basic block 22, loop depth 2
;; pred: 22
;; 21
# k_699 = PHI <k_191(22), _19(21)>
# ivtmp.894_2547 = PHI <ivtmp.894_2548(22), _2554(21)>
# ivtmp.896_2558 = PHI <ivtmp.896_2559(22), ivtmp.904_2600(21)>
_1794 = (float *) ivtmp.894_2547;
_1788 = (float *) ivtmp.896_2558;
__builtin_memcpy (_1794, _1788, _1798);
k_191 = k_699 + 1;
ivtmp.894_2548 = _2546 + ivtmp.894_2547;
ivtmp.896_2559 = ivtmp.896_2558 + _2573;
if (k_191 == _2571)
goto <bb 23>; [11.00%]
else
goto <bb 22>; [89.00%]
;; Generating RTL for gimple basic block 21
;; _2571 = (int) _2570;
(insn 451 450 0 (parallel [
(set (reg:SI 1200 [ _2571 ])
(plus:SI (reg:SI 115 [ _18 ])
(const_int 1 [0x1])))
(clobber (reg:CC 17 flags))
]) -1
(nil))
;; Generating RTL for gimple basic block 22
Selected stringop expansion strategy: libcall
...
After the patch series:
;; basic block 17, loop depth 1
;; pred: 16
_2616 = ivtmp.906_2548 * 4;
ivtmp.896_2513 = _2615 + _2616;
_2522 = (unsigned int) _18;
_2523 = _2522 + 1;
_2524 = (int) _2523;
;; succ: 18
;; basic block 18, loop depth 2
;; pred: 18
;; 17
# k_660 = PHI <k_191(18), _19(17)>
# ivtmp.894_2500 = PHI <ivtmp.894_2501(18), _2507(17)>
# ivtmp.896_2511 = PHI <ivtmp.896_2512(18), ivtmp.896_2513(17)>
_1826 = (float *) ivtmp.894_2500;
_256 = (float *) ivtmp.896_2511;
__builtin_memcpy (_1826, _256, _1820);
k_191 = k_660 + 1;
ivtmp.894_2501 = _2499 + ivtmp.894_2500;
ivtmp.896_2512 = ivtmp.896_2511 + _2526;
if (k_191 == _2524)
goto <bb 19>; [11.00%]
else
goto <bb 18>; [89.00%]
Then we stuck at:
;; Generating RTL for gimple basic block 17
Swap operands in stmt:
ivtmp.896_2513 = _2615 + _2616;
Cost left opnd=0, right opnd=1
Registering value_relation (_2546 pe64 _139) (bb10) at _2546 = (unsigned long)
_139;
;; ivtmp.896_2513 = _2616 + _2615;
(insn 411 410 412 (parallel [
(set (reg:DI 1333 [ _2616 ])
(ashift:DI (reg:DI 1224 [ ivtmp.906 ])
(const_int 2 [0x2])))
(clobber (reg:CC 17 flags))
]) -1
(nil))
(insn 412 411 0 (parallel [
(set (reg:DI 1204 [ ivtmp.896 ])
(plus:DI (reg:DI 1333 [ _2616 ])
(reg:DI 1236 [ _2615 ])))
(clobber (reg:CC 17 flags))
]) -1
(nil))
;; _2524 = (int) _2523;
(insn 413 412 0 (parallel [
(set (reg:SI 1207 [ _2524 ])
(plus:SI (reg:SI 115 [ _18