Hi: Bootstrapped and regtested on x86_64-linux-gnu{-m32,} Ok for trunk?
gcc/ChangeLog: PR rtl-optimization/101796 * simplify-rtx.c (simplify_context::simplify_binary_operation_1): Simplify vector shift/rotate with const_vec_duplicate to vector shift/rotate with const_int element. gcc/testsuite/ChangeLog: PR rtl-optimization/101796 * gcc.target/i386/pr101796.c: New test. --- gcc/simplify-rtx.c | 15 ++++++ gcc/testsuite/gcc.target/i386/pr101796.c | 65 ++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr101796.c diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index a719f57870f..75f3e455562 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -3970,6 +3970,21 @@ simplify_context::simplify_binary_operation_1 (rtx_code code, return simplify_gen_binary (code, mode, op0, gen_int_shift_amount (mode, val)); } + + /* Optimize vector shift/rotate with const_vec_duplicate + to vector shift/rotate with const_int element. + /* TODO: vec_duplicate with variable can also be simplified, + but GCC only require operand 2 of shift/rotate to be a scalar type + which can have different modes in different backends, it makes + simplication difficult to decide which mode should be choosed + for shift/rotate count. */ + if ((code == ASHIFTRT || code == LSHIFTRT + || code == ASHIFT || code == ROTATERT + || code == ROTATE) + && const_vec_duplicate_p (op1)) + return simplify_gen_binary (code, mode, op0, + unwrap_const_vec_duplicate (op1)); + break; case ASHIFT: diff --git a/gcc/testsuite/gcc.target/i386/pr101796.c b/gcc/testsuite/gcc.target/i386/pr101796.c new file mode 100644 index 00000000000..c22d6267fe5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101796.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2 " } */ +/* { dg-final { scan-assembler-not "vpbroadcast" } } */ +/* { dg-final { scan-assembler-not "vpsrlv\[dwq\]" } } */ +/* { dg-final { scan-assembler-not "vpsllv\[dwq\]" } } */ +/* { dg-final { scan-assembler-not "vpsrav\[dwq\]" } } */ +/* { dg-final { scan-assembler-times "vpsrl\[dwq\]" 3 } } */ +/* { dg-final { scan-assembler-times "vpsll\[dwq\]" 3 } } */ +/* { dg-final { scan-assembler-times "vpsra\[dwq\]" 3 } } */ + +#include <immintrin.h> + +__m512i +foo (__m512i a) +{ + return _mm512_srlv_epi16 (a, _mm512_set1_epi16 (3)); +} + +__m512i +foo1 (__m512i a) +{ + return _mm512_srlv_epi32 (a, _mm512_set1_epi32 (3)); +} + +__m512i +foo2 (__m512i a, long long b) +{ + return _mm512_srlv_epi64 (a, _mm512_set1_epi64 (3)); +} + +__m512i +foo3 (__m512i a) +{ + return _mm512_srav_epi16 (a, _mm512_set1_epi16 (3)); +} + +__m512i +foo4 (__m512i a) +{ + return _mm512_srav_epi32 (a, _mm512_set1_epi32 (3)); +} + +__m512i +foo5 (__m512i a, long long b) +{ + return _mm512_srav_epi64 (a, _mm512_set1_epi64 (3)); +} + +__m512i +foo6 (__m512i a) +{ + return _mm512_sllv_epi16 (a, _mm512_set1_epi16 (3)); +} + +__m512i +foo7 (__m512i a) +{ + return _mm512_sllv_epi32 (a, _mm512_set1_epi32 (3)); +} + +__m512i +foo8 (__m512i a, long long b) +{ + return _mm512_sllv_epi64 (a, _mm512_set1_epi64 (3)); +} -- 2.27.0