On Wed, Apr 19, 2023 at 9:54 AM Hongtao Liu <crazy...@gmail.com> wrote: > > On Tue, Apr 18, 2023 at 3:18 PM Haochen Jiang via Gcc-patches > <gcc-patches@gcc.gnu.org> wrote: > > > > Hi all, > > > > Currently in GCC, the 128 bit intrin for instruction vpclmulqdq is > > under PCLMUL ISA. Because there is no dependency between ISA set PCLMUL > > and VPCLMULQDQ, The 128 bit intrin is not available when we just use > > compiler flag -mvpclmulqdq. But it should according to Intel SDM. > > > > Since VPCLMULQDQ is a VEX/EVEX promotion for PCLMUL, it is natural to > > add dependency between them. > > > > Also, with -mvpclmulqdq, we can use ymm under VEX encoding, so > > VPCLMULQDQ should imply AVX. > > > > Tested on x86_64-pc-linux-gnu. Ok for trunk? > > > > BRs, > > Haochen > > > > gcc/ChangeLog: > > > > * common/config/i386/i386-common.cc > > (OPTION_MASK_ISA_VPCLMULQDQ_SET): > > Add OPTION_MASK_ISA_PCLMUL_SET and OPTION_MASK_ISA_AVX_SET. > > (OPTION_MASK_ISA_AVX_UNSET): > > Add OPTION_MASK_ISA_VPCLMULQDQ_UNSET. > > (OPTION_MASK_ISA_PCLMUL_UNSET): Ditto. > > * config/i386/i386.md (vpclmulqdqvl): New. > > * config/i386/sse.md (pclmulqdq): Add evex encoding. > > * config/i386/vpclmulqdqintrin.h: Remove redudant avx target > > push. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/vpclmulqdq.c: Add compile test for xmm. > > --- > > gcc/common/config/i386/i386-common.cc | 9 ++++++--- > > gcc/config/i386/i386.md | 4 +++- > > gcc/config/i386/sse.md | 11 ++++++----- > > gcc/config/i386/vpclmulqdqintrin.h | 4 ++-- > > gcc/testsuite/gcc.target/i386/vpclmulqdq.c | 3 +++ > > 5 files changed, 20 insertions(+), 11 deletions(-) > > > > diff --git a/gcc/common/config/i386/i386-common.cc > > b/gcc/common/config/i386/i386-common.cc > > index 315db854862..c7954da8e34 100644 > > --- a/gcc/common/config/i386/i386-common.cc > > +++ b/gcc/common/config/i386/i386-common.cc > > @@ -171,7 +171,9 @@ along with GCC; see the file COPYING3. If not see > > #define OPTION_MASK_ISA_GFNI_SET OPTION_MASK_ISA_GFNI > > #define OPTION_MASK_ISA_SHSTK_SET OPTION_MASK_ISA_SHSTK > > #define OPTION_MASK_ISA2_VAES_SET OPTION_MASK_ISA2_VAES > > -#define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ > > +#define OPTION_MASK_ISA_VPCLMULQDQ_SET \ > > + (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_PCLMUL_SET \ > > + | OPTION_MASK_ISA_AVX_SET) > > #define OPTION_MASK_ISA_MOVDIRI_SET OPTION_MASK_ISA_MOVDIRI > > #define OPTION_MASK_ISA2_MOVDIR64B_SET OPTION_MASK_ISA2_MOVDIR64B > > #define OPTION_MASK_ISA2_WAITPKG_SET OPTION_MASK_ISA2_WAITPKG > > @@ -211,7 +213,7 @@ along with GCC; see the file COPYING3. If not see > > #define OPTION_MASK_ISA_AVX_UNSET \ > > (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \ > > | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \ > > - | OPTION_MASK_ISA_AVX2_UNSET ) > > + | OPTION_MASK_ISA_AVX2_UNSET | OPTION_MASK_ISA_VPCLMULQDQ_UNSET) > > #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA > > #define OPTION_MASK_ISA_FXSR_UNSET OPTION_MASK_ISA_FXSR > > #define OPTION_MASK_ISA_XSAVE_UNSET \ > > @@ -314,7 +316,8 @@ along with GCC; see the file COPYING3. If not see > > > > #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES > > #define OPTION_MASK_ISA_SHA_UNSET OPTION_MASK_ISA_SHA > > -#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL > > +#define OPTION_MASK_ISA_PCLMUL_UNSET \ > > + (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_VPCLMULQDQ_UNSET) > > #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM > > #define OPTION_MASK_ISA2_PCONFIG_UNSET OPTION_MASK_ISA2_PCONFIG > > #define OPTION_MASK_ISA2_WBNOINVD_UNSET OPTION_MASK_ISA2_WBNOINVD > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > > index ed689b044c3..acc994226e7 100644 > > --- a/gcc/config/i386/i386.md > > +++ b/gcc/config/i386/i386.md > > @@ -841,7 +841,7 @@ > > > > avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, > > avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl, > > > > avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma, > > - avx512ifmavl,avxneconvert,avx512bf16vl" > > + avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl" > > (const_string "base")) > > > > ;; Define instruction set of MMX instructions > > @@ -903,6 +903,8 @@ > > (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT") > > (eq_attr "isa" "avx512bf16vl") > > (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL") > > + (eq_attr "isa" "vpclmulqdqvl") > > + (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL") > > > > (eq_attr "mmx_isa" "native") > > (symbol_ref "!TARGET_MMX_WITH_SSE") > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > > index 26812ab6106..33e281901cf 100644 > > --- a/gcc/config/i386/sse.md > > +++ b/gcc/config/i386/sse.md > > @@ -25195,20 +25195,21 @@ > > (set_attr "mode" "TI")]) > > > > (define_insn "pclmulqdq" > > - [(set (match_operand:V2DI 0 "register_operand" "=x,x") > > - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") > > - (match_operand:V2DI 2 "vector_operand" "xBm,xm") > > + [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") > > + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") > > + (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm") > Just change x to Yv instead of introducing a new alternative. Looks like the even encoding needs both vpclmulqdq and avx512vl, so i take back the upper comments, the patch LGTM. > Others LGTM. > > (match_operand:SI 3 "const_0_to_255_operand")] > > UNSPEC_PCLMUL))] > > "TARGET_PCLMUL" > > "@ > > pclmulqdq\t{%3, %2, %0|%0, %2, %3} > > + vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3} > > vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" > > - [(set_attr "isa" "noavx,avx") > > + [(set_attr "isa" "noavx,avx,vpclmulqdqvl") > > (set_attr "type" "sselog1") > > (set_attr "prefix_extra" "1") > > (set_attr "length_immediate" "1") > > - (set_attr "prefix" "orig,vex") > > + (set_attr "prefix" "orig,vex,evex") > > (set_attr "mode" "TI")]) > > > > (define_expand "avx_vzeroall" > > diff --git a/gcc/config/i386/vpclmulqdqintrin.h > > b/gcc/config/i386/vpclmulqdqintrin.h > > index ba93fc4ff9c..2c83b6037a0 100644 > > --- a/gcc/config/i386/vpclmulqdqintrin.h > > +++ b/gcc/config/i386/vpclmulqdqintrin.h > > @@ -53,9 +53,9 @@ _mm512_clmulepi64_epi128 (__m512i __A, __m512i __B, const > > int __C) > > #pragma GCC pop_options > > #endif /* __DISABLE_VPCLMULQDQF__ */ > > > > -#if !defined(__VPCLMULQDQ__) || !defined(__AVX__) > > +#if !defined(__VPCLMULQDQ__) > > #pragma GCC push_options > > -#pragma GCC target("vpclmulqdq,avx") > > +#pragma GCC target("vpclmulqdq") > > #define __DISABLE_VPCLMULQDQ__ > > #endif /* __VPCLMULQDQ__ */ > > > > diff --git a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c > > b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c > > index d93f776803f..27b2fd71ea4 100644 > > --- a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c > > +++ b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c > > @@ -2,16 +2,19 @@ > > /* { dg-options "-mvpclmulqdq -mavx512vl -mavx512f -O2" } */ > > /* { dg-final { scan-assembler-times "vpclmulqdq\[ > > \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ > > \\t\]+#)" 1 } } */ > > /* { dg-final { scan-assembler-times "vpclmulqdq\[ > > \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ > > \\t\]+#)" 1 } } */ > > +/* { dg-final { scan-assembler-times "vpclmulqdq\[ > > \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ > > \\t\]+#)" 1 } } */ > > > > #include <x86intrin.h> > > > > volatile __m512i x1, x2; > > volatile __m256i x3, x4; > > +volatile __m128i x5, x6; > > > > void extern > > avx512vl_test (void) > > { > > x1 = _mm512_clmulepi64_epi128(x1, x2, 3); > > x3 = _mm256_clmulepi64_epi128(x3, x4, 3); > > + x5 = _mm_clmulepi64_si128(x5, x6, 3); > > } > > > > -- > > 2.31.1 > > > > > -- > BR, > Hongtao
-- BR, Hongtao