Hi! We ICE on the following testcase now that IFN_C[LT]Z calls can have one or two arguments (where 2 mean it is well defined at zero). The following patch makes us create child node only for the first argument and compatible_calls_p ensures the other argument is the same, which at least according to the testcase seems sufficient because of vect patterns.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2023-11-16 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/112536 * tree-vect-slp.cc (arg0_map): New variable. (vect_get_operand_map): For IFN_CLZ or IFN_CTZ, return arg0_map. * gcc.dg/pr112536.c: New test. --- gcc/tree-vect-slp.cc.jj 2023-11-11 08:52:20.896838494 +0100 +++ gcc/tree-vect-slp.cc 2023-11-15 10:30:57.606329777 +0100 @@ -505,6 +505,7 @@ static const int cond_expr_maps[3][5] = { 4, -2, -1, 1, 2 }, { 4, -1, -2, 2, 1 } }; +static const int arg0_map[] = { 1, 0 }; static const int arg1_map[] = { 1, 1 }; static const int arg2_map[] = { 1, 2 }; static const int arg1_arg4_map[] = { 2, 1, 4 }; @@ -580,6 +581,10 @@ vect_get_operand_map (const gimple *stmt return nullptr; } + case IFN_CLZ: + case IFN_CTZ: + return arg0_map; + default: break; } --- gcc/testsuite/gcc.dg/pr112536.c.jj 2023-11-15 10:37:44.316580909 +0100 +++ gcc/testsuite/gcc.dg/pr112536.c 2023-11-15 10:37:19.464932191 +0100 @@ -0,0 +1,58 @@ +/* PR tree-optimization/112536 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-ipa-icf" } */ +/* { dg-additional-options "-mlzcnt -mavx512cd -mavx512vl" { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-assembler-times "\tvplzcntd\t" 3 { target { i?86-*-* x86_64-*-* } } } } */ + +unsigned a[12]; + +void +foo (void) +{ + int i = a[0]; + int j = a[1]; + int k = a[2]; + int l = a[3]; + int e = i ? __builtin_clz (i) : __SIZEOF_INT__ * __CHAR_BIT__; + int f = j ? __builtin_clz (j) : __SIZEOF_INT__ * __CHAR_BIT__; + int g = k ? __builtin_clz (k) : __SIZEOF_INT__ * __CHAR_BIT__; + int h = l ? __builtin_clz (l) : __SIZEOF_INT__ * __CHAR_BIT__; + a[0] = e; + a[1] = f; + a[2] = g; + a[3] = h; +} + +void +bar (void) +{ + int i = a[4]; + int j = a[5]; + int k = a[6]; + int l = a[7]; + int e = i ? __builtin_clz (i) : __SIZEOF_INT__ * __CHAR_BIT__; + int f = __builtin_clz (j); + int g = __builtin_clz (k); + int h = l ? __builtin_clz (l) : __SIZEOF_INT__ * __CHAR_BIT__; + a[4] = e; + a[5] = f; + a[6] = g; + a[7] = h; +} + +void +baz (void) +{ + int i = a[8]; + int j = a[9]; + int k = a[10]; + int l = a[11]; + int e = __builtin_clz (i); + int f = j ? __builtin_clz (j) : __SIZEOF_INT__ * __CHAR_BIT__; + int g = __builtin_clz (k); + int h = l ? __builtin_clz (l) : __SIZEOF_INT__ * __CHAR_BIT__; + a[8] = e; + a[9] = f; + a[10] = g; + a[11] = h; +} Jakub