https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102512
Bug ID: 102512 Summary: Redudant max/min operation for vector reduction Product: gcc Version: 12.0 Status: UNCONFIRMED Keywords: missed-optimization Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: crazylht at gmail dot com Target Milestone: --- Host: x86_64-pc-linux-gnu Target: x86_64-*-* i?86-*-* cat test.c #define MAX(a, b) ((a) > (b) ? (a) : (b)) short foo1 (short* p) { short max = p[0]; for (int i = 0; i != 8; i++) max = MAX(max, p[i]); return max; } short foo2 (short* p) { short max = p[0]; for (int i = 1; i != 8; i++) max = MAX(max, p[i]); return max; } gcc -O3 -mavx2 -S in foo1 the first MAX_EXPR <_10, vect__4.7_13> is redundant since it's contained by the latter .REDUC_MAX. in foo2 vectorizer failed to recognize .REDUC_MAX pattern. ;; Function foo1 (foo1, funcdef_no=0, decl_uid=2991, cgraph_uid=1, symbol_order=0) .248t.optimized short int foo1 (short int * p) { vector(8) short int vect_max_11.8; vector(8) short int vect__4.7; short int max; vector(8) short int _10; short int _20; <bb 2> [local count: 119292720]: max_9 = *p_8(D); _10 = {max_9, max_9, max_9, max_9, max_9, max_9, max_9, max_9}; vect__4.7_13 = MEM <vector(8) short int> [(short int *)p_8(D)]; vect_max_11.8_14 = MAX_EXPR <_10, vect__4.7_13>; _20 = .REDUC_MAX (vect_max_11.8_14); [tail call] return _20; } ;; Function foo2 (foo2, funcdef_no=1, decl_uid=3000, cgraph_uid=2, symbol_order=1) short int foo2 (short int * p) { short int stmp_max_11.21; vector(4) short int vect_max_11.20; vector(4) short int vect__4.19; short int max; short int _4; short int _25; vector(4) short int _30; short int _34; vector(4) short int _38; vector(4) short int _39; vector(4) short int _40; vector(4) short int _41; short int _44; short int _46; <bb 2> [local count: 268435454]: max_9 = *p_8(D); _30 = {max_9, max_9, max_9, max_9}; vect__4.19_35 = MEM <vector(4) short int> [(short int *)p_8(D) + 2B]; vect_max_11.20_36 = MAX_EXPR <_30, vect__4.19_35>; _38 = VEC_PERM_EXPR <vect_max_11.20_36, { 0, 0, 0, 0 }, { 2, 3, 4, 5 }>; _39 = MAX_EXPR <vect_max_11.20_36, _38>; _40 = VEC_PERM_EXPR <_39, { 0, 0, 0, 0 }, { 1, 2, 3, 4 }>; _41 = MAX_EXPR <_39, _40>; stmp_max_11.21_42 = BIT_FIELD_REF <_41, 16, 0>; _4 = MEM[(short int *)p_8(D) + 10B]; _46 = MEM[(short int *)p_8(D) + 12B]; _34 = MAX_EXPR <_4, _46>; _25 = MEM[(short int *)p_8(D) + 14B]; _44 = MAX_EXPR <_25, stmp_max_11.21_42>; max_26 = MAX_EXPR <_34, _44>; return max_26; }