On Thu, May 10, 2018 at 7:47 PM, Uros Bizjak <ubiz...@gmail.com> wrote:

> Please find attached the patch that fixes all the issues (plus some
> whitespace fixes). It is tested and generates acceptable code. Please
> update the ChangeLog (do not use past tense in the ChangeLog entry!)
> and, if it works for you, please commit the attached version.

Sure enough, I have attached the wrong version. Please find correct
patch attached to the message.

Uros.
Index: cpuid.h
===================================================================
--- cpuid.h     (revision 260116)
+++ cpuid.h     (working copy)
@@ -98,6 +98,7 @@
 #define bit_AVX512VBMI (1 << 1)
 #define bit_PKU        (1 << 3)
 #define bit_OSPKE      (1 << 4)
+#define bit_WAITPKG    (1 << 5)
 #define bit_AVX512VBMI2        (1 << 6)
 #define bit_SHSTK      (1 << 7)
 #define bit_GFNI       (1 << 8)
Index: driver-i386.c
===================================================================
--- driver-i386.c       (revision 260116)
+++ driver-i386.c       (working copy)
@@ -424,6 +424,7 @@ const char *host_detect_local_cpu (int argc, const
   unsigned int has_avx512vnni = 0, has_vaes = 0;
   unsigned int has_vpclmulqdq = 0;
   unsigned int has_movdiri = 0, has_movdir64b = 0;
+  unsigned int has_waitpkg = 0;
 
   bool arch;
 
@@ -527,6 +528,7 @@ const char *host_detect_local_cpu (int argc, const
 
       has_shstk = ecx & bit_SHSTK;
       has_pconfig = edx & bit_PCONFIG;
+      has_waitpkg = ecx & bit_WAITPKG;
     }
 
   if (max_level >= 13)
@@ -1108,6 +1110,7 @@ const char *host_detect_local_cpu (int argc, const
       const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " 
-mno-avx512bitalg";
       const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
       const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " 
-mno-movdir64b";
+      const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg";
       options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
                        sse4a, cx16, sahf, movbe, aes, sha, pclmul,
                        popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
@@ -1120,7 +1123,7 @@ const char *host_detect_local_cpu (int argc, const
                        avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
                        clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
                        avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
-                       avx512bitalg, movdiri, movdir64b, NULL);
+                       avx512bitalg, movdiri, movdir64b, waitpkg, NULL);
     }
 
 done:
Index: i386-builtin-types.def
===================================================================
--- i386-builtin-types.def      (revision 260116)
+++ i386-builtin-types.def      (working copy)
@@ -290,6 +290,7 @@ DEF_FUNCTION_TYPE (VOID, UINT64)
 DEF_FUNCTION_TYPE (VOID, UINT64, PVOID)
 DEF_FUNCTION_TYPE (VOID, UNSIGNED)
 DEF_FUNCTION_TYPE (VOID, UNSIGNED, PVOID)
+DEF_FUNCTION_TYPE (UINT8, UNSIGNED, UINT64)
 DEF_FUNCTION_TYPE (INT, PUSHORT)
 DEF_FUNCTION_TYPE (INT, PUNSIGNED)
 DEF_FUNCTION_TYPE (INT, PULONGLONG)
Index: i386-c.c
===================================================================
--- i386-c.c    (revision 260116)
+++ i386-c.c    (working copy)
@@ -516,6 +516,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_fla
     def_or_undef (parse_in, "__MOVDIRI__");
   if (isa_flag2 & OPTION_MASK_ISA_MOVDIR64B)
     def_or_undef (parse_in, "__MOVDIR64B__");
+  if (isa_flag2 & OPTION_MASK_ISA_WAITPKG)
+    def_or_undef (parse_in, "__WAITPKG__");
   if (TARGET_IAMCU)
     {
       def_or_undef (parse_in, "__iamcu");
Index: i386.c
===================================================================
--- i386.c      (revision 260116)
+++ i386.c      (working copy)
@@ -2772,7 +2772,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_I
     { "-mmovbe",       OPTION_MASK_ISA_MOVBE },
     { "-mclzero",      OPTION_MASK_ISA_CLZERO },
     { "-mmwaitx",      OPTION_MASK_ISA_MWAITX },
-    { "-mmovdir64b",   OPTION_MASK_ISA_MOVDIR64B }
+    { "-mmovdir64b",   OPTION_MASK_ISA_MOVDIR64B },
+    { "-mwaitpkg",     OPTION_MASK_ISA_WAITPKG }
   };
   static struct ix86_target_opts isa_opts[] =
   {
@@ -3455,6 +3456,7 @@ ix86_option_override_internal (bool main_args_p,
   const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6);
   const wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7);
   const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8);
+  const wide_int_bitmask PTA_WAITPKG (0, HOST_WIDE_INT_1U << 9);
 
   const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
     | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
@@ -5387,6 +5389,7 @@ ix86_valid_target_attribute_inner_p (tree args, ch
     IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq),
     IX86_ATTR_ISA ("movdiri", OPT_mmovdiri),
     IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b),
+    IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg),
 
     /* enum options */
     IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
@@ -30642,6 +30645,9 @@ enum ix86_builtins
   IX86_BUILTIN_CLFLUSH,
   IX86_BUILTIN_MONITOR,
   IX86_BUILTIN_MWAIT,
+  IX86_BUILTIN_UMONITOR,
+  IX86_BUILTIN_UMWAIT,
+  IX86_BUILTIN_TPAUSE,
   IX86_BUILTIN_CLZERO,
   IX86_BUILTIN_VEC_INIT_V2SI,
   IX86_BUILTIN_VEC_INIT_V4HI,
@@ -31973,6 +31979,14 @@ ix86_init_mmx_sse_builtins (void)
   def_builtin2 (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
                VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
 
+  /* WAITPKG.  */
+  def_builtin2 (OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umonitor",
+              VOID_FTYPE_PVOID, IX86_BUILTIN_UMONITOR);
+  def_builtin2 (OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umwait",
+              UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_UMWAIT);
+  def_builtin2 (OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_tpause",
+              UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_TPAUSE);
+
   /* Add FMA4 multi-arg argument instructions */
   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
     {
@@ -37048,6 +37062,82 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub
       emit_insn (gen_mwaitx (op0, op1, op2));
       return 0;
 
+    case IX86_BUILTIN_UMONITOR:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+
+      op0 = ix86_zero_extend_to_Pmode (op0);
+
+      insn = (TARGET_64BIT
+             ? gen_umonitor_di (op0)
+             : gen_umonitor_si (op0));
+
+      emit_insn (insn);
+      return 0;
+
+    case IX86_BUILTIN_UMWAIT:
+    case IX86_BUILTIN_TPAUSE:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      arg1 = CALL_EXPR_ARG (exp, 1);
+      op0 = expand_normal (arg0);
+      op1 = expand_normal (arg1);
+
+      if (!REG_P (op0))
+       op0 = copy_to_mode_reg (SImode, op0);
+
+      op1 = force_reg (DImode, op1);
+
+      if (TARGET_64BIT)
+       {
+         op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
+                                    NULL, 1, OPTAB_DIRECT);
+         switch (fcode)
+           {
+           case IX86_BUILTIN_UMWAIT:
+             icode = CODE_FOR_umwait_rex64;
+             break;
+           case IX86_BUILTIN_TPAUSE:
+             icode = CODE_FOR_tpause_rex64;
+             break;
+           default:
+             gcc_unreachable ();
+           }
+
+         op2 = gen_lowpart (SImode, op2);
+         op1 = gen_lowpart (SImode, op1);
+         pat = GEN_FCN (icode) (op0, op1, op2);
+       }
+      else
+       {
+         switch (fcode)
+           {
+           case IX86_BUILTIN_UMWAIT:
+             icode = CODE_FOR_umwait;
+             break;
+           case IX86_BUILTIN_TPAUSE:
+             icode = CODE_FOR_tpause;
+             break;
+           default:
+             gcc_unreachable ();
+           }
+         pat = GEN_FCN (icode) (op0, op1);
+       }
+
+      if (!pat)
+       return 0;
+
+      emit_insn (pat);
+
+      if (target == 0
+         || !register_operand (target, QImode))
+       target = gen_reg_rtx (QImode);
+
+      pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
+                       const0_rtx);
+      emit_insn (gen_rtx_SET (target, pat));
+
+      return target;
+
     case IX86_BUILTIN_CLZERO:
       arg0 = CALL_EXPR_ARG (exp, 0);
       op0 = expand_normal (arg0);
Index: i386.h
===================================================================
--- i386.h      (revision 260116)
+++ i386.h      (working copy)
@@ -189,6 +189,8 @@ see the files COPYING3 and COPYING.RUNTIME respect
 #define TARGET_MOVDIRI_P(x) TARGET_ISA_MOVDIRI_P(x)
 #define TARGET_MOVDIR64B       TARGET_ISA_MOVDIR64B
 #define TARGET_MOVDIR64B_P(x) TARGET_ISA_MOVDIR64B_P(x)
+#define TARGET_WAITPKG TARGET_ISA_WAITPKG
+#define TARGET_WAITPKG_P(x)    TARGET_ISA_WAITPKG_P(x)
 
 #define TARGET_LP64    TARGET_ABI_64
 #define TARGET_LP64_P(x)       TARGET_ABI_64_P(x)
Index: i386.md
===================================================================
--- i386.md     (revision 260116)
+++ i386.md     (working copy)
@@ -289,8 +289,15 @@
   UNSPECV_WRUSS
   UNSPECV_SETSSBSY
   UNSPECV_CLRSSBSY
+
+  ;; For MOVDIRI and MOVDIR64B support
   UNSPECV_MOVDIRI
   UNSPECV_MOVDIR64B
+
+  ;; For WAITPKG support
+  UNSPECV_UMWAIT
+  UNSPECV_UMONITOR
+  UNSPECV_TPAUSE
 ])
 
 ;; Constants to represent rounding modes in the ROUND instruction
@@ -20960,22 +20967,72 @@
   "wbnoinvd"
   [(set_attr "type" "other")])
 
+;; MOVDIRI and MOVDIR64B
+
 (define_insn "movdiri<mode>"
-  [(unspec_volatile:SWI48[(match_operand:SWI48 0 "memory_operand" "m")
-       (match_operand:SWI48 1 "register_operand" "r")]
-                  UNSPECV_MOVDIRI)]
+  [(unspec_volatile:SWI48 [(match_operand:SWI48 0 "memory_operand" "m")
+                          (match_operand:SWI48 1 "register_operand" "r")]
+                         UNSPECV_MOVDIRI)]
   "TARGET_MOVDIRI"
   "movdiri\t{%1, %0|%0, %1}"
   [(set_attr "type" "other")])
 
 (define_insn "movdir64b_<mode>"
-  [(unspec_volatile:XI[(match_operand:P 0 "register_operand" "r")
-       (match_operand:XI 1 "memory_operand")]
-                UNSPECV_MOVDIR64B)]
+  [(unspec_volatile:XI [(match_operand:P 0 "register_operand" "r")
+                       (match_operand:XI 1 "memory_operand")]
+                      UNSPECV_MOVDIR64B)]
   "TARGET_MOVDIR64B"
   "movdir64b\t{%1, %0|%0, %1}"
   [(set_attr "type" "other")])
 
+;; WAITPKG
+
+(define_insn "umwait"
+  [(set (reg:CCC FLAGS_REG)
+       (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
+                             (match_operand:DI 1 "register_operand" "A")]
+                            UNSPECV_UMWAIT))]
+  "!TARGET_64BIT && TARGET_WAITPKG"
+  "umwait\t%0"
+  [(set_attr "length" "3")])
+
+(define_insn "umwait_rex64"
+  [(set (reg:CCC FLAGS_REG)
+       (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
+                             (match_operand:SI 1 "register_operand" "a")
+                             (match_operand:SI 2 "register_operand" "d")]
+                            UNSPECV_UMWAIT))]
+  "TARGET_64BIT && TARGET_WAITPKG"
+  "umwait\t%0"
+  [(set_attr "length" "3")])
+
+(define_insn "umonitor_<mode>"
+  [(unspec_volatile [(match_operand:P 0 "register_operand" "r")]
+                   UNSPECV_UMONITOR)]
+  "TARGET_WAITPKG"
+  "umonitor\t%0"
+  [(set (attr "length")
+     (symbol_ref ("(Pmode != word_mode) + 3")))])
+
+(define_insn "tpause"
+  [(set (reg:CCC FLAGS_REG)
+       (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
+                             (match_operand:DI 1 "register_operand" "A")]
+                            UNSPECV_TPAUSE))]
+  "!TARGET_64BIT && TARGET_WAITPKG"
+  "tpause\t%0"
+  [(set_attr "length" "3")])
+
+(define_insn "tpause_rex64"
+  [(set (reg:CCC FLAGS_REG)
+       (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r")
+                             (match_operand:SI 1 "register_operand" "a")
+                             (match_operand:SI 2 "register_operand" "d")]
+                            UNSPECV_TPAUSE))]
+  "TARGET_64BIT && TARGET_WAITPKG"
+  "tpause\t%0"
+  [(set_attr "length" "3")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")
Index: i386.opt
===================================================================
--- i386.opt    (revision 260116)
+++ i386.opt    (working copy)
@@ -1055,3 +1055,7 @@ Support MOVDIRI built-in functions and code genera
 mmovdir64b
 Target Report Mask(ISA_MOVDIR64B) Var(ix86_isa_flags2) Save
 Support MOVDIR64B built-in functions and code generation.
+
+mwaitpkg
+Target Report Mask(ISA_WAITPKG) Var(ix86_isa_flags2) Save
+Support WAITPKG built-in functions and code generation.
Index: waitpkgintrin.h
===================================================================
--- waitpkgintrin.h     (nonexistent)
+++ waitpkgintrin.h     (working copy)
@@ -0,0 +1,63 @@
+/* Copyright (C) 2018 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _X86INTRIN_H_INCLUDED
+# error "Never use <waitpkgintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _WAITPKG_H_INCLUDED
+#define _WAITPKG_H_INCLUDED
+
+#ifndef __WAITPKG__
+#pragma GCC push_options
+#pragma GCC target("waitpkg")
+#define __DISABLE_WAITPKG__
+#endif /* __WAITPKG__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_umonitor (void *__A)
+{
+  __builtin_ia32_umonitor (__A);
+}
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_umwait (unsigned int __A, unsigned long long __B)
+{
+  return __builtin_ia32_umwait (__A, __B);
+}
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tpause (unsigned int __A, unsigned long long __B)
+{
+  return __builtin_ia32_tpause (__A, __B);
+}
+
+#ifdef __DISABLE_WAITPKG__
+#undef __DISABLE_WAITPKG__
+#pragma GCC pop_options
+#endif /* __DISABLE_WAITPKG__ */
+
+#endif /* _WAITPKG_H_INCLUDED.  */
Index: x86intrin.h
===================================================================
--- x86intrin.h (revision 260116)
+++ x86intrin.h (working copy)
@@ -101,6 +101,8 @@
 
 #include <pkuintrin.h>
 
+#include <waitpkgintrin.h>
+
 #endif /* __iamcu__ */
 
 #endif /* _X86INTRIN_H_INCLUDED */

Reply via email to