On Thu, May 10, 2018 at 7:47 PM, Uros Bizjak <ubiz...@gmail.com> wrote:
> Please find attached the patch that fixes all the issues (plus some > whitespace fixes). It is tested and generates acceptable code. Please > update the ChangeLog (do not use past tense in the ChangeLog entry!) > and, if it works for you, please commit the attached version. Sure enough, I have attached the wrong version. Please find correct patch attached to the message. Uros.
Index: cpuid.h =================================================================== --- cpuid.h (revision 260116) +++ cpuid.h (working copy) @@ -98,6 +98,7 @@ #define bit_AVX512VBMI (1 << 1) #define bit_PKU (1 << 3) #define bit_OSPKE (1 << 4) +#define bit_WAITPKG (1 << 5) #define bit_AVX512VBMI2 (1 << 6) #define bit_SHSTK (1 << 7) #define bit_GFNI (1 << 8) Index: driver-i386.c =================================================================== --- driver-i386.c (revision 260116) +++ driver-i386.c (working copy) @@ -424,6 +424,7 @@ const char *host_detect_local_cpu (int argc, const unsigned int has_avx512vnni = 0, has_vaes = 0; unsigned int has_vpclmulqdq = 0; unsigned int has_movdiri = 0, has_movdir64b = 0; + unsigned int has_waitpkg = 0; bool arch; @@ -527,6 +528,7 @@ const char *host_detect_local_cpu (int argc, const has_shstk = ecx & bit_SHSTK; has_pconfig = edx & bit_PCONFIG; + has_waitpkg = ecx & bit_WAITPKG; } if (max_level >= 13) @@ -1108,6 +1110,7 @@ const char *host_detect_local_cpu (int argc, const const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg"; const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri"; const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b"; + const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg"; options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, sse4a, cx16, sahf, movbe, aes, sha, pclmul, popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2, @@ -1120,7 +1123,7 @@ const char *host_detect_local_cpu (int argc, const avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw, clwb, mwaitx, clzero, pku, rdpid, gfni, shstk, avx512vbmi2, avx512vnni, vaes, vpclmulqdq, - avx512bitalg, movdiri, movdir64b, NULL); + avx512bitalg, movdiri, movdir64b, waitpkg, NULL); } done: Index: i386-builtin-types.def =================================================================== --- i386-builtin-types.def (revision 260116) +++ i386-builtin-types.def (working copy) @@ -290,6 +290,7 @@ DEF_FUNCTION_TYPE (VOID, UINT64) DEF_FUNCTION_TYPE (VOID, UINT64, PVOID) DEF_FUNCTION_TYPE (VOID, UNSIGNED) DEF_FUNCTION_TYPE (VOID, UNSIGNED, PVOID) +DEF_FUNCTION_TYPE (UINT8, UNSIGNED, UINT64) DEF_FUNCTION_TYPE (INT, PUSHORT) DEF_FUNCTION_TYPE (INT, PUNSIGNED) DEF_FUNCTION_TYPE (INT, PULONGLONG) Index: i386-c.c =================================================================== --- i386-c.c (revision 260116) +++ i386-c.c (working copy) @@ -516,6 +516,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_fla def_or_undef (parse_in, "__MOVDIRI__"); if (isa_flag2 & OPTION_MASK_ISA_MOVDIR64B) def_or_undef (parse_in, "__MOVDIR64B__"); + if (isa_flag2 & OPTION_MASK_ISA_WAITPKG) + def_or_undef (parse_in, "__WAITPKG__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); Index: i386.c =================================================================== --- i386.c (revision 260116) +++ i386.c (working copy) @@ -2772,7 +2772,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_I { "-mmovbe", OPTION_MASK_ISA_MOVBE }, { "-mclzero", OPTION_MASK_ISA_CLZERO }, { "-mmwaitx", OPTION_MASK_ISA_MWAITX }, - { "-mmovdir64b", OPTION_MASK_ISA_MOVDIR64B } + { "-mmovdir64b", OPTION_MASK_ISA_MOVDIR64B }, + { "-mwaitpkg", OPTION_MASK_ISA_WAITPKG } }; static struct ix86_target_opts isa_opts[] = { @@ -3455,6 +3456,7 @@ ix86_option_override_internal (bool main_args_p, const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6); const wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7); const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8); + const wide_int_bitmask PTA_WAITPKG (0, HOST_WIDE_INT_1U << 9); const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR; @@ -5387,6 +5389,7 @@ ix86_valid_target_attribute_inner_p (tree args, ch IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq), IX86_ATTR_ISA ("movdiri", OPT_mmovdiri), IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b), + IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), @@ -30642,6 +30645,9 @@ enum ix86_builtins IX86_BUILTIN_CLFLUSH, IX86_BUILTIN_MONITOR, IX86_BUILTIN_MWAIT, + IX86_BUILTIN_UMONITOR, + IX86_BUILTIN_UMWAIT, + IX86_BUILTIN_TPAUSE, IX86_BUILTIN_CLZERO, IX86_BUILTIN_VEC_INIT_V2SI, IX86_BUILTIN_VEC_INIT_V4HI, @@ -31973,6 +31979,14 @@ ix86_init_mmx_sse_builtins (void) def_builtin2 (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero", VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO); + /* WAITPKG. */ + def_builtin2 (OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umonitor", + VOID_FTYPE_PVOID, IX86_BUILTIN_UMONITOR); + def_builtin2 (OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umwait", + UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_UMWAIT); + def_builtin2 (OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_tpause", + UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_TPAUSE); + /* Add FMA4 multi-arg argument instructions */ for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) { @@ -37048,6 +37062,82 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub emit_insn (gen_mwaitx (op0, op1, op2)); return 0; + case IX86_BUILTIN_UMONITOR: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + + op0 = ix86_zero_extend_to_Pmode (op0); + + insn = (TARGET_64BIT + ? gen_umonitor_di (op0) + : gen_umonitor_si (op0)); + + emit_insn (insn); + return 0; + + case IX86_BUILTIN_UMWAIT: + case IX86_BUILTIN_TPAUSE: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + + op1 = force_reg (DImode, op1); + + if (TARGET_64BIT) + { + op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), + NULL, 1, OPTAB_DIRECT); + switch (fcode) + { + case IX86_BUILTIN_UMWAIT: + icode = CODE_FOR_umwait_rex64; + break; + case IX86_BUILTIN_TPAUSE: + icode = CODE_FOR_tpause_rex64; + break; + default: + gcc_unreachable (); + } + + op2 = gen_lowpart (SImode, op2); + op1 = gen_lowpart (SImode, op1); + pat = GEN_FCN (icode) (op0, op1, op2); + } + else + { + switch (fcode) + { + case IX86_BUILTIN_UMWAIT: + icode = CODE_FOR_umwait; + break; + case IX86_BUILTIN_TPAUSE: + icode = CODE_FOR_tpause; + break; + default: + gcc_unreachable (); + } + pat = GEN_FCN (icode) (op0, op1); + } + + if (!pat) + return 0; + + emit_insn (pat); + + if (target == 0 + || !register_operand (target, QImode)) + target = gen_reg_rtx (QImode); + + pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (target, pat)); + + return target; + case IX86_BUILTIN_CLZERO: arg0 = CALL_EXPR_ARG (exp, 0); op0 = expand_normal (arg0); Index: i386.h =================================================================== --- i386.h (revision 260116) +++ i386.h (working copy) @@ -189,6 +189,8 @@ see the files COPYING3 and COPYING.RUNTIME respect #define TARGET_MOVDIRI_P(x) TARGET_ISA_MOVDIRI_P(x) #define TARGET_MOVDIR64B TARGET_ISA_MOVDIR64B #define TARGET_MOVDIR64B_P(x) TARGET_ISA_MOVDIR64B_P(x) +#define TARGET_WAITPKG TARGET_ISA_WAITPKG +#define TARGET_WAITPKG_P(x) TARGET_ISA_WAITPKG_P(x) #define TARGET_LP64 TARGET_ABI_64 #define TARGET_LP64_P(x) TARGET_ABI_64_P(x) Index: i386.md =================================================================== --- i386.md (revision 260116) +++ i386.md (working copy) @@ -289,8 +289,15 @@ UNSPECV_WRUSS UNSPECV_SETSSBSY UNSPECV_CLRSSBSY + + ;; For MOVDIRI and MOVDIR64B support UNSPECV_MOVDIRI UNSPECV_MOVDIR64B + + ;; For WAITPKG support + UNSPECV_UMWAIT + UNSPECV_UMONITOR + UNSPECV_TPAUSE ]) ;; Constants to represent rounding modes in the ROUND instruction @@ -20960,22 +20967,72 @@ "wbnoinvd" [(set_attr "type" "other")]) +;; MOVDIRI and MOVDIR64B + (define_insn "movdiri<mode>" - [(unspec_volatile:SWI48[(match_operand:SWI48 0 "memory_operand" "m") - (match_operand:SWI48 1 "register_operand" "r")] - UNSPECV_MOVDIRI)] + [(unspec_volatile:SWI48 [(match_operand:SWI48 0 "memory_operand" "m") + (match_operand:SWI48 1 "register_operand" "r")] + UNSPECV_MOVDIRI)] "TARGET_MOVDIRI" "movdiri\t{%1, %0|%0, %1}" [(set_attr "type" "other")]) (define_insn "movdir64b_<mode>" - [(unspec_volatile:XI[(match_operand:P 0 "register_operand" "r") - (match_operand:XI 1 "memory_operand")] - UNSPECV_MOVDIR64B)] + [(unspec_volatile:XI [(match_operand:P 0 "register_operand" "r") + (match_operand:XI 1 "memory_operand")] + UNSPECV_MOVDIR64B)] "TARGET_MOVDIR64B" "movdir64b\t{%1, %0|%0, %1}" [(set_attr "type" "other")]) +;; WAITPKG + +(define_insn "umwait" + [(set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r") + (match_operand:DI 1 "register_operand" "A")] + UNSPECV_UMWAIT))] + "!TARGET_64BIT && TARGET_WAITPKG" + "umwait\t%0" + [(set_attr "length" "3")]) + +(define_insn "umwait_rex64" + [(set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "register_operand" "a") + (match_operand:SI 2 "register_operand" "d")] + UNSPECV_UMWAIT))] + "TARGET_64BIT && TARGET_WAITPKG" + "umwait\t%0" + [(set_attr "length" "3")]) + +(define_insn "umonitor_<mode>" + [(unspec_volatile [(match_operand:P 0 "register_operand" "r")] + UNSPECV_UMONITOR)] + "TARGET_WAITPKG" + "umonitor\t%0" + [(set (attr "length") + (symbol_ref ("(Pmode != word_mode) + 3")))]) + +(define_insn "tpause" + [(set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r") + (match_operand:DI 1 "register_operand" "A")] + UNSPECV_TPAUSE))] + "!TARGET_64BIT && TARGET_WAITPKG" + "tpause\t%0" + [(set_attr "length" "3")]) + +(define_insn "tpause_rex64" + [(set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "register_operand" "a") + (match_operand:SI 2 "register_operand" "d")] + UNSPECV_TPAUSE))] + "TARGET_64BIT && TARGET_WAITPKG" + "tpause\t%0" + [(set_attr "length" "3")]) + (include "mmx.md") (include "sse.md") (include "sync.md") Index: i386.opt =================================================================== --- i386.opt (revision 260116) +++ i386.opt (working copy) @@ -1055,3 +1055,7 @@ Support MOVDIRI built-in functions and code genera mmovdir64b Target Report Mask(ISA_MOVDIR64B) Var(ix86_isa_flags2) Save Support MOVDIR64B built-in functions and code generation. + +mwaitpkg +Target Report Mask(ISA_WAITPKG) Var(ix86_isa_flags2) Save +Support WAITPKG built-in functions and code generation. Index: waitpkgintrin.h =================================================================== --- waitpkgintrin.h (nonexistent) +++ waitpkgintrin.h (working copy) @@ -0,0 +1,63 @@ +/* Copyright (C) 2018 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _X86INTRIN_H_INCLUDED +# error "Never use <waitpkgintrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef _WAITPKG_H_INCLUDED +#define _WAITPKG_H_INCLUDED + +#ifndef __WAITPKG__ +#pragma GCC push_options +#pragma GCC target("waitpkg") +#define __DISABLE_WAITPKG__ +#endif /* __WAITPKG__ */ + +extern __inline void +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_umonitor (void *__A) +{ + __builtin_ia32_umonitor (__A); +} + +extern __inline unsigned char +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_umwait (unsigned int __A, unsigned long long __B) +{ + return __builtin_ia32_umwait (__A, __B); +} + +extern __inline unsigned char +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_tpause (unsigned int __A, unsigned long long __B) +{ + return __builtin_ia32_tpause (__A, __B); +} + +#ifdef __DISABLE_WAITPKG__ +#undef __DISABLE_WAITPKG__ +#pragma GCC pop_options +#endif /* __DISABLE_WAITPKG__ */ + +#endif /* _WAITPKG_H_INCLUDED. */ Index: x86intrin.h =================================================================== --- x86intrin.h (revision 260116) +++ x86intrin.h (working copy) @@ -101,6 +101,8 @@ #include <pkuintrin.h> +#include <waitpkgintrin.h> + #endif /* __iamcu__ */ #endif /* _X86INTRIN_H_INCLUDED */