On Wed, Apr 19, 2023 at 9:54 AM Hongtao Liu <crazy...@gmail.com> wrote:
>
> On Tue, Apr 18, 2023 at 3:18 PM Haochen Jiang via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Hi all,
> >
> > Currently in GCC, the 128 bit intrin for instruction vpclmulqdq is
> > under PCLMUL ISA. Because there is no dependency between ISA set PCLMUL
> > and VPCLMULQDQ, The 128 bit intrin is not available when we just use
> > compiler flag -mvpclmulqdq. But it should according to Intel SDM.
> >
> > Since VPCLMULQDQ is a VEX/EVEX promotion for PCLMUL, it is natural to
> > add dependency between them.
> >
> > Also, with -mvpclmulqdq, we can use ymm under VEX encoding, so
> > VPCLMULQDQ should imply AVX.
> >
> > Tested on x86_64-pc-linux-gnu. Ok for trunk?
> >
> > BRs,
> > Haochen
> >
> > gcc/ChangeLog:
> >
> >         * common/config/i386/i386-common.cc
> >         (OPTION_MASK_ISA_VPCLMULQDQ_SET):
> >         Add OPTION_MASK_ISA_PCLMUL_SET and OPTION_MASK_ISA_AVX_SET.
> >         (OPTION_MASK_ISA_AVX_UNSET):
> >         Add OPTION_MASK_ISA_VPCLMULQDQ_UNSET.
> >         (OPTION_MASK_ISA_PCLMUL_UNSET): Ditto.
> >         * config/i386/i386.md (vpclmulqdqvl): New.
> >         * config/i386/sse.md (pclmulqdq): Add evex encoding.
> >         * config/i386/vpclmulqdqintrin.h: Remove redudant avx target
> >         push.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/vpclmulqdq.c: Add compile test for xmm.
> > ---
> >  gcc/common/config/i386/i386-common.cc      |  9 ++++++---
> >  gcc/config/i386/i386.md                    |  4 +++-
> >  gcc/config/i386/sse.md                     | 11 ++++++-----
> >  gcc/config/i386/vpclmulqdqintrin.h         |  4 ++--
> >  gcc/testsuite/gcc.target/i386/vpclmulqdq.c |  3 +++
> >  5 files changed, 20 insertions(+), 11 deletions(-)
> >
> > diff --git a/gcc/common/config/i386/i386-common.cc 
> > b/gcc/common/config/i386/i386-common.cc
> > index 315db854862..c7954da8e34 100644
> > --- a/gcc/common/config/i386/i386-common.cc
> > +++ b/gcc/common/config/i386/i386-common.cc
> > @@ -171,7 +171,9 @@ along with GCC; see the file COPYING3.  If not see
> >  #define OPTION_MASK_ISA_GFNI_SET OPTION_MASK_ISA_GFNI
> >  #define OPTION_MASK_ISA_SHSTK_SET OPTION_MASK_ISA_SHSTK
> >  #define OPTION_MASK_ISA2_VAES_SET OPTION_MASK_ISA2_VAES
> > -#define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ
> > +#define OPTION_MASK_ISA_VPCLMULQDQ_SET \
> > +  (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_PCLMUL_SET \
> > +   | OPTION_MASK_ISA_AVX_SET)
> >  #define OPTION_MASK_ISA_MOVDIRI_SET OPTION_MASK_ISA_MOVDIRI
> >  #define OPTION_MASK_ISA2_MOVDIR64B_SET OPTION_MASK_ISA2_MOVDIR64B
> >  #define OPTION_MASK_ISA2_WAITPKG_SET OPTION_MASK_ISA2_WAITPKG
> > @@ -211,7 +213,7 @@ along with GCC; see the file COPYING3.  If not see
> >  #define OPTION_MASK_ISA_AVX_UNSET \
> >    (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
> >     | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \
> > -   | OPTION_MASK_ISA_AVX2_UNSET )
> > +   | OPTION_MASK_ISA_AVX2_UNSET | OPTION_MASK_ISA_VPCLMULQDQ_UNSET)
> >  #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
> >  #define OPTION_MASK_ISA_FXSR_UNSET OPTION_MASK_ISA_FXSR
> >  #define OPTION_MASK_ISA_XSAVE_UNSET \
> > @@ -314,7 +316,8 @@ along with GCC; see the file COPYING3.  If not see
> >
> >  #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
> >  #define OPTION_MASK_ISA_SHA_UNSET OPTION_MASK_ISA_SHA
> > -#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
> > +#define OPTION_MASK_ISA_PCLMUL_UNSET \
> > +  (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_VPCLMULQDQ_UNSET)
> >  #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
> >  #define OPTION_MASK_ISA2_PCONFIG_UNSET OPTION_MASK_ISA2_PCONFIG
> >  #define OPTION_MASK_ISA2_WBNOINVD_UNSET OPTION_MASK_ISA2_WBNOINVD
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > index ed689b044c3..acc994226e7 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -841,7 +841,7 @@
> >                     
> > avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
> >                     avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
> >                     
> > avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
> > -                   avx512ifmavl,avxneconvert,avx512bf16vl"
> > +                   avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl"
> >    (const_string "base"))
> >
> >  ;; Define instruction set of MMX instructions
> > @@ -903,6 +903,8 @@
> >          (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT")
> >          (eq_attr "isa" "avx512bf16vl")
> >            (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL")
> > +        (eq_attr "isa" "vpclmulqdqvl")
> > +          (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
> >
> >          (eq_attr "mmx_isa" "native")
> >            (symbol_ref "!TARGET_MMX_WITH_SSE")
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 26812ab6106..33e281901cf 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -25195,20 +25195,21 @@
> >     (set_attr "mode" "TI")])
> >
> >  (define_insn "pclmulqdq"
> > -  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
> > -       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
> > -                     (match_operand:V2DI 2 "vector_operand" "xBm,xm")
> > +  [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
> > +       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
> > +                     (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")
> Just change x to Yv instead of introducing a new alternative.
Looks like the even encoding needs both vpclmulqdq and avx512vl, so i
take back the upper comments, the patch LGTM.
> Others LGTM.
> >                       (match_operand:SI 3 "const_0_to_255_operand")]
> >                      UNSPEC_PCLMUL))]
> >    "TARGET_PCLMUL"
> >    "@
> >     pclmulqdq\t{%3, %2, %0|%0, %2, %3}
> > +   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}
> >     vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> > -  [(set_attr "isa" "noavx,avx")
> > +  [(set_attr "isa" "noavx,avx,vpclmulqdqvl")
> >     (set_attr "type" "sselog1")
> >     (set_attr "prefix_extra" "1")
> >     (set_attr "length_immediate" "1")
> > -   (set_attr "prefix" "orig,vex")
> > +   (set_attr "prefix" "orig,vex,evex")
> >     (set_attr "mode" "TI")])
> >
> >  (define_expand "avx_vzeroall"
> > diff --git a/gcc/config/i386/vpclmulqdqintrin.h 
> > b/gcc/config/i386/vpclmulqdqintrin.h
> > index ba93fc4ff9c..2c83b6037a0 100644
> > --- a/gcc/config/i386/vpclmulqdqintrin.h
> > +++ b/gcc/config/i386/vpclmulqdqintrin.h
> > @@ -53,9 +53,9 @@ _mm512_clmulepi64_epi128 (__m512i __A, __m512i __B, const 
> > int __C)
> >  #pragma GCC pop_options
> >  #endif /* __DISABLE_VPCLMULQDQF__ */
> >
> > -#if !defined(__VPCLMULQDQ__) || !defined(__AVX__)
> > +#if !defined(__VPCLMULQDQ__)
> >  #pragma GCC push_options
> > -#pragma GCC target("vpclmulqdq,avx")
> > +#pragma GCC target("vpclmulqdq")
> >  #define __DISABLE_VPCLMULQDQ__
> >  #endif /* __VPCLMULQDQ__ */
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c 
> > b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
> > index d93f776803f..27b2fd71ea4 100644
> > --- a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
> > +++ b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c
> > @@ -2,16 +2,19 @@
> >  /* { dg-options "-mvpclmulqdq -mavx512vl -mavx512f -O2" } */
> >  /* { dg-final { scan-assembler-times "vpclmulqdq\[ 
> > \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[
> >  \\t\]+#)" 1 } } */
> >  /* { dg-final { scan-assembler-times "vpclmulqdq\[ 
> > \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[
> >  \\t\]+#)" 1 } } */
> > +/* { dg-final { scan-assembler-times "vpclmulqdq\[ 
> > \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[
> >  \\t\]+#)" 1 } } */
> >
> >  #include <x86intrin.h>
> >
> >  volatile __m512i x1, x2;
> >  volatile __m256i x3, x4;
> > +volatile __m128i x5, x6;
> >
> >  void extern
> >  avx512vl_test (void)
> >  {
> >      x1 = _mm512_clmulepi64_epi128(x1, x2, 3);
> >      x3 = _mm256_clmulepi64_epi128(x3, x4, 3);
> > +    x5 = _mm_clmulepi64_si128(x5, x6, 3);
> >  }
> >
> > --
> > 2.31.1
> >
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao

Reply via email to