r339281 - [CodeGen][Timers] Enable llvm::TimePassesIsEnabled when -ftime-report is specified
Author: ctopper Date: Wed Aug 8 12:14:23 2018 New Revision: 339281 URL: http://llvm.org/viewvc/llvm-project?rev=339281&view=rev Log: [CodeGen][Timers] Enable llvm::TimePassesIsEnabled when -ftime-report is specified r330571 added a new FrontendTimesIsEnabled variable and replaced many usages of llvm::TimePassesIsEnabled. Including the place that set llvm::TimePassesIsEnabled for -ftime-report. The effect of this is that -ftime-report now only contains the timers specifically referenced in CodeGenAction.cpp and none of the timers in the backend. This commit adds back the assignment, but otherwise leaves everything else unchanged. Modified: cfe/trunk/lib/CodeGen/CodeGenAction.cpp Modified: cfe/trunk/lib/CodeGen/CodeGenAction.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenAction.cpp?rev=339281&r1=339280&r2=339281&view=diff == --- cfe/trunk/lib/CodeGen/CodeGenAction.cpp (original) +++ cfe/trunk/lib/CodeGen/CodeGenAction.cpp Wed Aug 8 12:14:23 2018 @@ -127,6 +127,7 @@ namespace clang { CodeGenOpts, C, CoverageInfo)), LinkModules(std::move(LinkModules)) { FrontendTimesIsEnabled = TimePasses; + llvm::TimePassesIsEnabled = TimePasses; } llvm::Module *getModule() const { return Gen->GetModule(); } std::unique_ptr takeModule() { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r339282 - [Builtins] Implement __builtin_clrsb to be compatible with gcc
Author: ctopper Date: Wed Aug 8 12:55:52 2018 New Revision: 339282 URL: http://llvm.org/viewvc/llvm-project?rev=339282&view=rev Log: [Builtins] Implement __builtin_clrsb to be compatible with gcc gcc defines an intrinsic called __builtin_clrsb which counts the number of extra sign bits on a number. This is equivalent to counting the number of leading zeros on a positive number or the number of leading ones on a negative number and subtracting one from the result. Since we can't count leading ones we need to invert negative numbers to count zeros. This patch will cause the builtin to be expanded inline while gcc uses a call to a function like clrsbdi2 that is implemented in libgcc. But this is similar to what we already do for popcnt. And I don't think compiler-rt supports clrsbdi2. Differential Revision: https://reviews.llvm.org/D50168 Added: cfe/trunk/test/CodeGen/builtin_clrsb.c (with props) Modified: cfe/trunk/include/clang/Basic/Builtins.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp Modified: cfe/trunk/include/clang/Basic/Builtins.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Builtins.def?rev=339282&r1=339281&r2=339282&view=diff == --- cfe/trunk/include/clang/Basic/Builtins.def (original) +++ cfe/trunk/include/clang/Basic/Builtins.def Wed Aug 8 12:55:52 2018 @@ -413,6 +413,9 @@ BUILTIN(__builtin_parityll, "iULLi", "nc BUILTIN(__builtin_popcount , "iUi" , "nc") BUILTIN(__builtin_popcountl , "iULi" , "nc") BUILTIN(__builtin_popcountll, "iULLi", "nc") +BUILTIN(__builtin_clrsb , "ii" , "nc") +BUILTIN(__builtin_clrsbl , "iLi" , "nc") +BUILTIN(__builtin_clrsbll, "iLLi", "nc") // FIXME: These type signatures are not correct for targets with int != 32-bits // or with ULL != 64-bits. Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=339282&r1=339281&r2=339282&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Aug 8 12:55:52 2018 @@ -1537,6 +1537,26 @@ RValue CodeGenFunction::EmitBuiltinExpr( return RValue::get(ComplexVal.second); } + case Builtin::BI__builtin_clrsb: + case Builtin::BI__builtin_clrsbl: + case Builtin::BI__builtin_clrsbll: { +// clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or +Value *ArgValue = EmitScalarExpr(E->getArg(0)); + +llvm::Type *ArgType = ArgValue->getType(); +Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + +llvm::Type *ResultType = ConvertType(E->getType()); +Value *Zero = llvm::Constant::getNullValue(ArgType); +Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg"); +Value *Inverse = Builder.CreateNot(ArgValue, "not"); +Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue); +Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()}); +Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1)); +Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); +return RValue::get(Result); + } case Builtin::BI__builtin_ctzs: case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: Added: cfe/trunk/test/CodeGen/builtin_clrsb.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtin_clrsb.c?rev=339282&view=auto == --- cfe/trunk/test/CodeGen/builtin_clrsb.c (added) +++ cfe/trunk/test/CodeGen/builtin_clrsb.c Wed Aug 8 12:55:52 2018 @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s + +int test__builtin_clrsb(int x) { +// CHECK-LABEL: test__builtin_clrsb +// CHECK: [[C:%.*]] = icmp slt i32 [[X:%.*]], 0 +// CHECK-NEXT: [[INV:%.*]] = xor i32 [[X]], -1 +// CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 [[INV]], i32 [[X]] +// CHECK-NEXT: [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[SEL]], i1 false) +// CHECK-NEXT: [[SUB:%.*]] = sub i32 [[CTLZ]], 1 + return __builtin_clrsb(x); +} + +int test__builtin_clrsbll(long long x) { +// CHECK-LABEL: test__builtin_clrsbll +// CHECK: [[C:%.*]] = icmp slt i64 [[X:%.*]], 0 +// CHECK-NEXT: [[INV:%.*]] = xor i64 [[X]], -1 +// CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i64 [[INV]], i64 [[X]] +// CHECK-NEXT: [[CTLZ:%.*]] = call i64 @llvm.ctlz.i64(i64 [[SEL]], i1 false) +// CHECK-NEXT: [[SUB:%.*]] = sub i64 [[CTLZ]], 1 +// CHECK-NEXT: trunc i64 [[SUB]] to i32 + return __builtin_clrsbll(x); +} Propchange: cfe/trunk/test/CodeGen/builtin_clrsb.c -- svn:eol-style = native Propchange: cfe/trunk/test/CodeGen/builtin_clrsb.c -- svn:keywords = "Author Date Id Rev URL" Propchange: cfe/trunk/t
r339287 - [Builtins] Add __builtin_clrsb support to IntExprEvaluator::VisitBuiltinCallExpr
Author: ctopper Date: Wed Aug 8 13:59:40 2018 New Revision: 339287 URL: http://llvm.org/viewvc/llvm-project?rev=339287&view=rev Log: [Builtins] Add __builtin_clrsb support to IntExprEvaluator::VisitBuiltinCallExpr This addresses a FIXME that has existed since before clang supported the builtin. Differential Revision: https://reviews.llvm.org/D50471 Modified: cfe/trunk/lib/AST/ExprConstant.cpp cfe/trunk/test/Sema/constant-builtins-2.c Modified: cfe/trunk/lib/AST/ExprConstant.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ExprConstant.cpp?rev=339287&r1=339286&r2=339287&view=diff == --- cfe/trunk/lib/AST/ExprConstant.cpp (original) +++ cfe/trunk/lib/AST/ExprConstant.cpp Wed Aug 8 13:59:40 2018 @@ -8117,9 +8117,15 @@ bool IntExprEvaluator::VisitBuiltinCallE case Builtin::BI__builtin_classify_type: return Success((int)EvaluateBuiltinClassifyType(E, Info.getLangOpts()), E); - // FIXME: BI__builtin_clrsb - // FIXME: BI__builtin_clrsbl - // FIXME: BI__builtin_clrsbll + case Builtin::BI__builtin_clrsb: + case Builtin::BI__builtin_clrsbl: + case Builtin::BI__builtin_clrsbll: { +APSInt Val; +if (!EvaluateInteger(E->getArg(0), Val, Info)) + return false; + +return Success(Val.getBitWidth() - Val.getMinSignedBits(), E); + } case Builtin::BI__builtin_clz: case Builtin::BI__builtin_clzl: Modified: cfe/trunk/test/Sema/constant-builtins-2.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/constant-builtins-2.c?rev=339287&r1=339286&r2=339287&view=diff == --- cfe/trunk/test/Sema/constant-builtins-2.c (original) +++ cfe/trunk/test/Sema/constant-builtins-2.c Wed Aug 8 13:59:40 2018 @@ -132,7 +132,7 @@ char isnormal_snan [!__builtin_isnorma char clz1[__builtin_clz(1) == BITSIZE(int) - 1 ? 1 : -1]; char clz2[__builtin_clz(7) == BITSIZE(int) - 3 ? 1 : -1]; char clz3[__builtin_clz(1 << (BITSIZE(int) - 1)) == 0 ? 1 : -1]; -int clz4 = __builtin_clz(0); // expected-error {{not a compile-time constant}} +//int clz4 = __builtin_clz(0); // expected-error {{not a compile-time constant}} char clz5[__builtin_clzl(0xFL) == BITSIZE(long) - 4 ? 1 : -1]; char clz6[__builtin_clzll(0xFFLL) == BITSIZE(long long) - 8 ? 1 : -1]; char clz7[__builtin_clzs(0x1) == BITSIZE(short) - 1 ? 1 : -1]; @@ -142,7 +142,7 @@ char clz9[__builtin_clzs(0xfff) == BITSI char ctz1[__builtin_ctz(1) == 0 ? 1 : -1]; char ctz2[__builtin_ctz(8) == 3 ? 1 : -1]; char ctz3[__builtin_ctz(1 << (BITSIZE(int) - 1)) == BITSIZE(int) - 1 ? 1 : -1]; -int ctz4 = __builtin_ctz(0); // expected-error {{not a compile-time constant}} +//int ctz4 = __builtin_ctz(0); // expected-error {{not a compile-time constant}} char ctz5[__builtin_ctzl(0x10L) == 4 ? 1 : -1]; char ctz6[__builtin_ctzll(0x100LL) == 8 ? 1 : -1]; char ctz7[__builtin_ctzs(1 << (BITSIZE(short) - 1)) == BITSIZE(short) - 1 ? 1 : -1]; @@ -176,6 +176,19 @@ char ffs4[__builtin_ffs(0xfbe70) == 5 ? char ffs5[__builtin_ffs(1U << (BITSIZE(int) - 1)) == BITSIZE(int) ? 1 : -1]; char ffs6[__builtin_ffsl(0x10L) == 5 ? 1 : -1]; char ffs7[__builtin_ffsll(0x100LL) == 9 ? 1 : -1]; + +char clrsb1[__builtin_clrsb(0) == BITSIZE(int) - 1 ? 1 : -1]; +char clrsb2[__builtin_clrsbl(0L) == BITSIZE(long) - 1 ? 1 : -1]; +char clrsb3[__builtin_clrsbll(0LL) == BITSIZE(long long) - 1 ? 1 : -1]; +char clrsb4[__builtin_clrsb(~0) == BITSIZE(int) - 1 ? 1 : -1]; +char clrsb5[__builtin_clrsbl(~0L) == BITSIZE(long) - 1 ? 1 : -1]; +char clrsb6[__builtin_clrsbll(~0LL) == BITSIZE(long long) - 1 ? 1 : -1]; +char clrsb7[__builtin_clrsb(1) == BITSIZE(int) - 2 ? 1 : -1]; +char clrsb8[__builtin_clrsb(~1) == BITSIZE(int) - 2 ? 1 : -1]; +char clrsb9[__builtin_clrsb(1 << (BITSIZE(int) - 1)) == 0 ? 1 : -1]; +char clrsb10[__builtin_clrsb(~(1 << (BITSIZE(int) - 1))) == 0 ? 1 : -1]; +char clrsb11[__builtin_clrsb(0xf) == BITSIZE(int) - 5 ? 1 : -1]; +char clrsb11[__builtin_clrsb(~0x1f) == BITSIZE(int) - 6 ? 1 : -1]; #undef BITSIZE // GCC misc stuff ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r339289 - Revert r339287 "[Builtins] Add __builtin_clrsb support to IntExprEvaluator::VisitBuiltinCallExpr"
Author: ctopper Date: Wed Aug 8 14:21:21 2018 New Revision: 339289 URL: http://llvm.org/viewvc/llvm-project?rev=339289&view=rev Log: Revert r339287 "[Builtins] Add __builtin_clrsb support to IntExprEvaluator::VisitBuiltinCallExpr" This add an additional unintended change in it. Modified: cfe/trunk/lib/AST/ExprConstant.cpp cfe/trunk/test/Sema/constant-builtins-2.c Modified: cfe/trunk/lib/AST/ExprConstant.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ExprConstant.cpp?rev=339289&r1=339288&r2=339289&view=diff == --- cfe/trunk/lib/AST/ExprConstant.cpp (original) +++ cfe/trunk/lib/AST/ExprConstant.cpp Wed Aug 8 14:21:21 2018 @@ -8117,15 +8117,9 @@ bool IntExprEvaluator::VisitBuiltinCallE case Builtin::BI__builtin_classify_type: return Success((int)EvaluateBuiltinClassifyType(E, Info.getLangOpts()), E); - case Builtin::BI__builtin_clrsb: - case Builtin::BI__builtin_clrsbl: - case Builtin::BI__builtin_clrsbll: { -APSInt Val; -if (!EvaluateInteger(E->getArg(0), Val, Info)) - return false; - -return Success(Val.getBitWidth() - Val.getMinSignedBits(), E); - } + // FIXME: BI__builtin_clrsb + // FIXME: BI__builtin_clrsbl + // FIXME: BI__builtin_clrsbll case Builtin::BI__builtin_clz: case Builtin::BI__builtin_clzl: Modified: cfe/trunk/test/Sema/constant-builtins-2.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/constant-builtins-2.c?rev=339289&r1=339288&r2=339289&view=diff == --- cfe/trunk/test/Sema/constant-builtins-2.c (original) +++ cfe/trunk/test/Sema/constant-builtins-2.c Wed Aug 8 14:21:21 2018 @@ -132,7 +132,7 @@ char isnormal_snan [!__builtin_isnorma char clz1[__builtin_clz(1) == BITSIZE(int) - 1 ? 1 : -1]; char clz2[__builtin_clz(7) == BITSIZE(int) - 3 ? 1 : -1]; char clz3[__builtin_clz(1 << (BITSIZE(int) - 1)) == 0 ? 1 : -1]; -//int clz4 = __builtin_clz(0); // expected-error {{not a compile-time constant}} +int clz4 = __builtin_clz(0); // expected-error {{not a compile-time constant}} char clz5[__builtin_clzl(0xFL) == BITSIZE(long) - 4 ? 1 : -1]; char clz6[__builtin_clzll(0xFFLL) == BITSIZE(long long) - 8 ? 1 : -1]; char clz7[__builtin_clzs(0x1) == BITSIZE(short) - 1 ? 1 : -1]; @@ -142,7 +142,7 @@ char clz9[__builtin_clzs(0xfff) == BITSI char ctz1[__builtin_ctz(1) == 0 ? 1 : -1]; char ctz2[__builtin_ctz(8) == 3 ? 1 : -1]; char ctz3[__builtin_ctz(1 << (BITSIZE(int) - 1)) == BITSIZE(int) - 1 ? 1 : -1]; -//int ctz4 = __builtin_ctz(0); // expected-error {{not a compile-time constant}} +int ctz4 = __builtin_ctz(0); // expected-error {{not a compile-time constant}} char ctz5[__builtin_ctzl(0x10L) == 4 ? 1 : -1]; char ctz6[__builtin_ctzll(0x100LL) == 8 ? 1 : -1]; char ctz7[__builtin_ctzs(1 << (BITSIZE(short) - 1)) == BITSIZE(short) - 1 ? 1 : -1]; @@ -176,19 +176,6 @@ char ffs4[__builtin_ffs(0xfbe70) == 5 ? char ffs5[__builtin_ffs(1U << (BITSIZE(int) - 1)) == BITSIZE(int) ? 1 : -1]; char ffs6[__builtin_ffsl(0x10L) == 5 ? 1 : -1]; char ffs7[__builtin_ffsll(0x100LL) == 9 ? 1 : -1]; - -char clrsb1[__builtin_clrsb(0) == BITSIZE(int) - 1 ? 1 : -1]; -char clrsb2[__builtin_clrsbl(0L) == BITSIZE(long) - 1 ? 1 : -1]; -char clrsb3[__builtin_clrsbll(0LL) == BITSIZE(long long) - 1 ? 1 : -1]; -char clrsb4[__builtin_clrsb(~0) == BITSIZE(int) - 1 ? 1 : -1]; -char clrsb5[__builtin_clrsbl(~0L) == BITSIZE(long) - 1 ? 1 : -1]; -char clrsb6[__builtin_clrsbll(~0LL) == BITSIZE(long long) - 1 ? 1 : -1]; -char clrsb7[__builtin_clrsb(1) == BITSIZE(int) - 2 ? 1 : -1]; -char clrsb8[__builtin_clrsb(~1) == BITSIZE(int) - 2 ? 1 : -1]; -char clrsb9[__builtin_clrsb(1 << (BITSIZE(int) - 1)) == 0 ? 1 : -1]; -char clrsb10[__builtin_clrsb(~(1 << (BITSIZE(int) - 1))) == 0 ? 1 : -1]; -char clrsb11[__builtin_clrsb(0xf) == BITSIZE(int) - 5 ? 1 : -1]; -char clrsb11[__builtin_clrsb(~0x1f) == BITSIZE(int) - 6 ? 1 : -1]; #undef BITSIZE // GCC misc stuff ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r339296 - [VFS] Remove superfluous semicolon from unittest.
Author: ctopper Date: Wed Aug 8 15:31:14 2018 New Revision: 339296 URL: http://llvm.org/viewvc/llvm-project?rev=339296&view=rev Log: [VFS] Remove superfluous semicolon from unittest. Modified: cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp Modified: cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp?rev=339296&r1=339295&r2=339296&view=diff == --- cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp (original) +++ cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp Wed Aug 8 15:31:14 2018 @@ -158,7 +158,7 @@ std::string getPosixPath(std::string S) SmallString<128> Result; llvm::sys::path::native(S, Result, llvm::sys::path::Style::posix); return Result.str(); -}; +} } // end anonymous namespace TEST(VirtualFileSystemTest, StatusQueries) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r339295 - [Builtins] Add __builtin_clrsb support to IntExprEvaluator::VisitBuiltinCallExpr
Author: ctopper Date: Wed Aug 8 15:31:12 2018 New Revision: 339295 URL: http://llvm.org/viewvc/llvm-project?rev=339295&view=rev Log: [Builtins] Add __builtin_clrsb support to IntExprEvaluator::VisitBuiltinCallExpr This addresses a FIXME that has existed since before clang supported the builtin. This time with only reviewed changes. Differential Revision: https://reviews.llvm.org/D50471 Modified: cfe/trunk/lib/AST/ExprConstant.cpp cfe/trunk/test/Sema/constant-builtins-2.c Modified: cfe/trunk/lib/AST/ExprConstant.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ExprConstant.cpp?rev=339295&r1=339294&r2=339295&view=diff == --- cfe/trunk/lib/AST/ExprConstant.cpp (original) +++ cfe/trunk/lib/AST/ExprConstant.cpp Wed Aug 8 15:31:12 2018 @@ -8117,9 +8117,15 @@ bool IntExprEvaluator::VisitBuiltinCallE case Builtin::BI__builtin_classify_type: return Success((int)EvaluateBuiltinClassifyType(E, Info.getLangOpts()), E); - // FIXME: BI__builtin_clrsb - // FIXME: BI__builtin_clrsbl - // FIXME: BI__builtin_clrsbll + case Builtin::BI__builtin_clrsb: + case Builtin::BI__builtin_clrsbl: + case Builtin::BI__builtin_clrsbll: { +APSInt Val; +if (!EvaluateInteger(E->getArg(0), Val, Info)) + return false; + +return Success(Val.getBitWidth() - Val.getMinSignedBits(), E); + } case Builtin::BI__builtin_clz: case Builtin::BI__builtin_clzl: Modified: cfe/trunk/test/Sema/constant-builtins-2.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/constant-builtins-2.c?rev=339295&r1=339294&r2=339295&view=diff == --- cfe/trunk/test/Sema/constant-builtins-2.c (original) +++ cfe/trunk/test/Sema/constant-builtins-2.c Wed Aug 8 15:31:12 2018 @@ -176,6 +176,19 @@ char ffs4[__builtin_ffs(0xfbe70) == 5 ? char ffs5[__builtin_ffs(1U << (BITSIZE(int) - 1)) == BITSIZE(int) ? 1 : -1]; char ffs6[__builtin_ffsl(0x10L) == 5 ? 1 : -1]; char ffs7[__builtin_ffsll(0x100LL) == 9 ? 1 : -1]; + +char clrsb1[__builtin_clrsb(0) == BITSIZE(int) - 1 ? 1 : -1]; +char clrsb2[__builtin_clrsbl(0L) == BITSIZE(long) - 1 ? 1 : -1]; +char clrsb3[__builtin_clrsbll(0LL) == BITSIZE(long long) - 1 ? 1 : -1]; +char clrsb4[__builtin_clrsb(~0) == BITSIZE(int) - 1 ? 1 : -1]; +char clrsb5[__builtin_clrsbl(~0L) == BITSIZE(long) - 1 ? 1 : -1]; +char clrsb6[__builtin_clrsbll(~0LL) == BITSIZE(long long) - 1 ? 1 : -1]; +char clrsb7[__builtin_clrsb(1) == BITSIZE(int) - 2 ? 1 : -1]; +char clrsb8[__builtin_clrsb(~1) == BITSIZE(int) - 2 ? 1 : -1]; +char clrsb9[__builtin_clrsb(1 << (BITSIZE(int) - 1)) == 0 ? 1 : -1]; +char clrsb10[__builtin_clrsb(~(1 << (BITSIZE(int) - 1))) == 0 ? 1 : -1]; +char clrsb11[__builtin_clrsb(0xf) == BITSIZE(int) - 5 ? 1 : -1]; +char clrsb12[__builtin_clrsb(~0x1f) == BITSIZE(int) - 6 ? 1 : -1]; #undef BITSIZE // GCC misc stuff ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r339721 - [InlineAsm] Update the min-legal-vector-width function attribute based on inputs and outputs to inline assembly
Author: ctopper Date: Tue Aug 14 13:21:05 2018 New Revision: 339721 URL: http://llvm.org/viewvc/llvm-project?rev=339721&view=rev Log: [InlineAsm] Update the min-legal-vector-width function attribute based on inputs and outputs to inline assembly Summary: Another piece of my ongoing to work for prefer-vector-width. min-legal-vector-width will eventually be used by the X86 backend to know whether it needs to make 512 bits type legal when prefer-vector-width=256. If the user used inline assembly that passed in/out a 512-bit register, we need to make sure 512 bits are considered legal. Otherwise we'll get an assert failure when we try to wire up the inline assembly to the rest of the code. This patch just checks the LLVM IR types to see if they are vectors and then updates the attribute based on their total width. I'm not sure if this is the best way to do this or if there's any subtlety I might have missed. So if anyone has other opinions on how to do this I'm open to suggestions. Reviewers: chandlerc, rsmith, rnk Reviewed By: rnk Subscribers: eraman, cfe-commits Differential Revision: https://reviews.llvm.org/D50678 Added: cfe/trunk/test/CodeGen/x86-inline-asm-min-vector-width.c Modified: cfe/trunk/lib/CodeGen/CGStmt.cpp Modified: cfe/trunk/lib/CodeGen/CGStmt.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmt.cpp?rev=339721&r1=339720&r2=339721&view=diff == --- cfe/trunk/lib/CodeGen/CGStmt.cpp (original) +++ cfe/trunk/lib/CodeGen/CGStmt.cpp Tue Aug 14 13:21:05 2018 @@ -1979,6 +1979,11 @@ void CodeGenFunction::EmitAsmStmt(const diag::err_asm_invalid_type_in_input) << OutExpr->getType() << OutputConstraint; } + + // Update largest vector width for any vector types. + if (auto *VT = dyn_cast(ResultRegTypes.back())) +LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); } else { ArgTypes.push_back(Dest.getAddress().getType()); Args.push_back(Dest.getPointer()); @@ -2000,6 +2005,10 @@ void CodeGenFunction::EmitAsmStmt(const Arg->getType())) Arg = Builder.CreateBitCast(Arg, AdjTy); + // Update largest vector width for any vector types. + if (auto *VT = dyn_cast(Arg->getType())) +LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); if (Info.allowsRegister()) InOutConstraints += llvm::utostr(i); else @@ -2080,6 +2089,11 @@ void CodeGenFunction::EmitAsmStmt(const CGM.getDiags().Report(S.getAsmLoc(), diag::err_asm_invalid_type_in_input) << InputExpr->getType() << InputConstraint; +// Update largest vector width for any vector types. +if (auto *VT = dyn_cast(Arg->getType())) + LargestVectorWidth = std::max(LargestVectorWidth, +VT->getPrimitiveSizeInBits()); + ArgTypes.push_back(Arg->getType()); Args.push_back(Arg); Constraints += InputConstraint; Added: cfe/trunk/test/CodeGen/x86-inline-asm-min-vector-width.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86-inline-asm-min-vector-width.c?rev=339721&view=auto == --- cfe/trunk/test/CodeGen/x86-inline-asm-min-vector-width.c (added) +++ cfe/trunk/test/CodeGen/x86-inline-asm-min-vector-width.c Tue Aug 14 13:21:05 2018 @@ -0,0 +1,45 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-feature +avx512f -o - | FileCheck %s + +typedef long long __m128i __attribute__ ((vector_size (16))); +typedef long long __m256i __attribute__ ((vector_size (32))); +typedef long long __m512i __attribute__ ((vector_size (64))); + +// CHECK: define <2 x i64> @testXMMout(<2 x i64>* %p) #0 +__m128i testXMMout(__m128i *p) { + __m128i xmm0; + __asm__("vmovdqu %1, %0" :"=v"(xmm0) : "m"(*(__m128i*)p)); + return xmm0; +} + +// CHECK: define <4 x i64> @testYMMout(<4 x i64>* %p) #1 +__m256i testYMMout(__m256i *p) { + __m256i ymm0; + __asm__("vmovdqu %1, %0" :"=v"(ymm0) : "m"(*(__m256i*)p)); + return ymm0; +} + +// CHECK: define <8 x i64> @testZMMout(<8 x i64>* %p) #2 +__m512i testZMMout(__m512i *p) { + __m512i zmm0; + __asm__("vmovdqu64 %1, %0" :"=v"(zmm0) : "m"(*(__m512i*)p)); + return zmm0; +} + +// CHECK: define void @testXMMin(<2 x i64> %xmm0, <2 x i64>* %p) #0 +void testXMMin(__m128i xmm0, __m128i *p) { + __asm__("vmovdqu %0, %1" : : "v"(xmm0), "m"(*(__m128i*)p)); +} + +// CHECK: define void @testYMMin(<4 x i64> %ymm0, <4 x i64>* %p) #1 +void testYMMin(__m256i ymm0, __m256i *p) { + __asm__("vmovdqu %0, %1" : : "v"(ymm0), "m"(*(__m256i*)p)); +} + +// CHECK: define void @testZMMin(<8 x i64> %zmm0, <8 x i64>* %p) #2 +void testZMMin(__m512i zmm0, __m51
r339843 - [X86] Remove masking from the 512-bit padds and psubs builtins. Use select builtin instead.
Author: ctopper Date: Wed Aug 15 23:20:29 2018 New Revision: 339843 URL: http://llvm.org/viewvc/llvm-project?rev=339843&view=rev Log: [X86] Remove masking from the 512-bit padds and psubs builtins. Use select builtin instead. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=339843&r1=339842&r2=339843&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Aug 15 23:20:29 2018 @@ -1038,8 +1038,8 @@ TARGET_BUILTIN(__builtin_ia32_packssdw51 TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_paddsb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_paddsw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_paddsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_paddsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") @@ -1051,8 +1051,8 @@ TARGET_BUILTIN(__builtin_ia32_pminsw512, TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psubsb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psubsw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psubsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psubsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=339843&r1=339842&r2=339843&view=diff == --- cfe/trunk/lib/Headers/avx512bwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bwintrin.h Wed Aug 15 23:20:29 2018 @@ -422,57 +422,45 @@ _mm512_maskz_packus_epi16(__mmask64 __M, static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_adds_epi8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_si512(), - (__mmask64) -1); + return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, - __m512i __B) +_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) __W, - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, +(__v64qi)_mm512_adds_epi8(__A, __B), +(__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_si512(), - (__mmask64) __U); + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, +(__v64qi)_mm512_adds_epi8(__A, __B), +(__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_adds_epi16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_si512(), - (__mmask32) -1); + return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, - __m512i __B) +_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U
r339845 - [X86] Remove masking from the 512-bit paddus/psubus builtins. Use a select builtin instead.
Author: ctopper Date: Thu Aug 16 00:28:06 2018 New Revision: 339845 URL: http://llvm.org/viewvc/llvm-project?rev=339845&view=rev Log: [X86] Remove masking from the 512-bit paddus/psubus builtins. Use a select builtin instead. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512bwintrin.h Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=339845&r1=339844&r2=339845&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu Aug 16 00:28:06 2018 @@ -1040,8 +1040,8 @@ TARGET_BUILTIN(__builtin_ia32_packusdw51 TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_paddsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_paddsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_paddusb512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_paddusw512, "V32sV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "ncV:512:", "avx512bw") @@ -1053,8 +1053,8 @@ TARGET_BUILTIN(__builtin_ia32_pminuw512, TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psubsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psubsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psubusb512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psubusw512, "V32sV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128_mask, "V2LLiV2LLiV2LLiUc", "ncV:128:", "avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256_mask, "V4LLiV4LLiV4LLiUc", "ncV:256:", "avx512cd,avx512vl") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=339845&r1=339844&r2=339845&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Aug 16 00:28:06 2018 @@ -8931,12 +8931,6 @@ static Value *EmitX86AddSubSatExpr(CodeG Res = CGF.Builder.CreateSub(Select, Ops[1]); } - if (E->getNumArgs() == 4) { // For masked intrinsics. -Value *VecSRC = Ops[2]; -Value *Mask = Ops[3]; -return EmitX86Select(CGF, Mask, Res, VecSRC); - } - return Res; } @@ -10563,15 +10557,15 @@ Value *CodeGenFunction::EmitX86BuiltinEx Load->setVolatile(true); return Load; } - case X86::BI__builtin_ia32_paddusb512_mask: - case X86::BI__builtin_ia32_paddusw512_mask: + case X86::BI__builtin_ia32_paddusb512: + case X86::BI__builtin_ia32_paddusw512: case X86::BI__builtin_ia32_paddusb256: case X86::BI__builtin_ia32_paddusw256: case X86::BI__builtin_ia32_paddusb128: case X86::BI__builtin_ia32_paddusw128: return EmitX86AddSubSatExpr(*this, E, Ops, true /* IsAddition */); - case X86::BI__builtin_ia32_psubusb512_mask: - case X86::BI__builtin_ia32_psubusw512_mask: + case X86::BI__builtin_ia32_psubusb512: + case X86::BI__builtin_ia32_psubusw512: case X86::BI__builtin_ia32_psubusb256: case X86::BI__builtin_ia32_psubusw256: case X86::BI__builtin_ia32_psubusb128: Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=339845&r1=339844&r2=339845&view=diff == --- cfe/trunk/lib/Headers/avx512bwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bwintrin.h Thu Aug 16 00:28:06 2018 @@ -466,57 +466,45 @@ _mm512_maskz_adds_epi16 (__mmask32 __U, static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_adds_epu8 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, - (__v64qi) __B, - (__v64qi) _mm512_setzero_si512(), - (__mmask64) -1); + return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __
r331893 - [X86] Only enable the __ud2 and __int2c builtins if intrin.h has been included.
Author: ctopper Date: Wed May 9 09:57:48 2018 New Revision: 331893 URL: http://llvm.org/viewvc/llvm-project?rev=331893&view=rev Log: [X86] Only enable the __ud2 and __int2c builtins if intrin.h has been included. Differential Revision: https://reviews.llvm.org/D46332 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=331893&r1=331892&r2=331893&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed May 9 09:57:48 2018 @@ -1899,8 +1899,8 @@ TARGET_HEADER_BUILTIN(__emulu, "ULLiUiUi TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(__int2c, "v", "nr", "intrin.h", ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(__ud2, "v", "nr", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__int2c, "v", "nhr", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__ud2, "v", "nhr", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__readfsbyte, "UcUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__readfsword, "UsUNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r331943 - [Builtins] Improve the IR emitted for MSVC compatible rotr/rotl builtins to match what the middle and backends understand
Author: ctopper Date: Wed May 9 17:05:13 2018 New Revision: 331943 URL: http://llvm.org/viewvc/llvm-project?rev=331943&view=rev Log: [Builtins] Improve the IR emitted for MSVC compatible rotr/rotl builtins to match what the middle and backends understand Previously we emitted something like rotl(x, n) { n &= bitwidth-1; return n != 0 ? ((x << n) | (x >> (bitwidth - n)) : x; } We use a select to avoid the undefined behavior on the (bitwidth - n) shift. The middle and backend don't really recognize this as a rotate and end up emitting a cmov or control flow because of the select. A better pattern is (x << (n & mask)) | (x << (-n & mask)) where mask is bitwidth - 1. Fixes the main complaint in PR37387. There's still some work to be done if the user writes that sequence directly on a short or char where type promotion rules can prevent it from being recognized. The builtin is emitting direct IR with unpromoted types so that isn't a problem for it. Differential Revision: https://reviews.llvm.org/D46656 Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/ms-intrinsics-rotations.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=331943&r1=331942&r2=331943&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed May 9 17:05:13 2018 @@ -1409,20 +1409,14 @@ RValue CodeGenFunction::EmitBuiltinExpr( llvm::Type *ArgType = Val->getType(); Shift = Builder.CreateIntCast(Shift, ArgType, false); -unsigned ArgWidth = cast(ArgType)->getBitWidth(); -Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); -Value *ArgZero = llvm::Constant::getNullValue(ArgType); - +unsigned ArgWidth = ArgType->getIntegerBitWidth(); Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); -Shift = Builder.CreateAnd(Shift, Mask); -Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift); - -Value *RightShifted = Builder.CreateLShr(Val, Shift); -Value *LeftShifted = Builder.CreateShl(Val, LeftShift); -Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); -Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); -Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); +Value *RightShiftAmt = Builder.CreateAnd(Shift, Mask); +Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt); +Value *LeftShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask); +Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt); +Value *Result = Builder.CreateOr(LeftShifted, RightShifted); return RValue::get(Result); } case Builtin::BI_rotl8: @@ -1435,20 +1429,14 @@ RValue CodeGenFunction::EmitBuiltinExpr( llvm::Type *ArgType = Val->getType(); Shift = Builder.CreateIntCast(Shift, ArgType, false); -unsigned ArgWidth = cast(ArgType)->getBitWidth(); -Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth); -Value *ArgZero = llvm::Constant::getNullValue(ArgType); - +unsigned ArgWidth = ArgType->getIntegerBitWidth(); Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); -Shift = Builder.CreateAnd(Shift, Mask); -Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift); - -Value *LeftShifted = Builder.CreateShl(Val, Shift); -Value *RightShifted = Builder.CreateLShr(Val, RightShift); -Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted); -Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero); -Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated); +Value *LeftShiftAmt = Builder.CreateAnd(Shift, Mask); +Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt); +Value *RightShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask); +Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt); +Value *Result = Builder.CreateOr(LeftShifted, RightShifted); return RValue::get(Result); } case Builtin::BI__builtin_unpredictable: { Modified: cfe/trunk/test/CodeGen/ms-intrinsics-rotations.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/ms-intrinsics-rotations.c?rev=331943&r1=331942&r2=331943&view=diff == --- cfe/trunk/test/CodeGen/ms-intrinsics-rotations.c (original) +++ cfe/trunk/test/CodeGen/ms-intrinsics-rotations.c Wed May 9 17:05:13 2018 @@ -30,13 +30,12 @@ unsigned char test_rotl8(unsigned char v return _rotl8(value, shift); } // CHECK: i8 @test_rotl8 -// CHECK: [[SHIFT:%[0-9]+]] = and i8 %{{[0-9]+}}, 7 -// CHECK: [[NEGSHIFT:%[0-9]+]] = sub i8 8, [[SHIFT]] -// CHECK: [[HIGH:%[0-9]+]] = shl i8 [[VALUE:%[0-9]+]], [[SHIFT]] -// CHECK: [[LOW:%[0-9]+]] = lshr i8 [[VALUE]], [[NEGSHIFT]] -// CHECK: [[ROTATED:%[0-9]+]] = or i8 [[HIGH]], [[LOW]] -// CHECK: [[ISZERO:%
r331958 - [X86] Change the implementation of scalar masked load/store intrinsics to not use a 512-bit intermediate vector.
Author: ctopper Date: Wed May 9 22:43:43 2018 New Revision: 331958 URL: http://llvm.org/viewvc/llvm-project?rev=331958&view=rev Log: [X86] Change the implementation of scalar masked load/store intrinsics to not use a 512-bit intermediate vector. This is unnecessary for AVX512VL supporting CPUs like SKX. We can just emit a 128-bit masked load/store here no matter what. The backend will widen it to 512-bits on KNL CPUs. Fixes the frontend portion of PR37386. Need to fix the backend to optimize the new sequences well. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=331958&r1=331957&r2=331958&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed May 9 22:43:43 2018 @@ -1523,10 +1523,10 @@ TARGET_BUILTIN(__builtin_ia32_fixupimmps TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V8dV8d*V8dUc", "n", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V2dV2d*V2dUc", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V16fV16f*V16fUs", "n", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V4fV4f*V4fUc", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc", "n", "avx512vl") @@ -1543,10 +1543,10 @@ TARGET_BUILTIN(__builtin_ia32_storedquhi TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs", "n", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, "vV32c*V32cUi", "n", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV8d*V8dUc", "n", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV2d*V2dUc", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV16f*V16fUs", "n", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV4f*V4fUc", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2LLi*V2LLiUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4LLi*V4LLiUc", "n", "avx512vl") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=331958&r1=331957&r2=331958&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed May 9 22:43:43 2018 @@ -8735,7 +8735,7 @@ Value *CodeGenFunction::EmitX86BuiltinEx case X86::BI__builtin_ia32_storess128_mask: case X86::BI__builtin_ia32_storesd128_mask: { -return EmitX86MaskedStore(*this, Ops, 16); +return EmitX86MaskedStore(*this, Ops, 1); } case X86::BI__builtin_ia32_vpopcntb_128: case X86::BI__builtin_ia32_vpopcntd_128: @@ -8819,7 +8819,7 @@ Value *CodeGenFunction::EmitX86BuiltinEx case X86::BI__builtin_ia32_loadss128_mask: case X86::BI__builtin_ia32_loadsd128_mask: -return EmitX86MaskedLoad(*this, Ops, 16); +return EmitX86MaskedLoad(*this, Ops, 1); case X86::BI__builtin_ia32_loadaps128_mask: case X86::BI__builtin_ia32_loadaps256_mask: Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=331958&r1=331957&r2=331958&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Wed May 9 22:43:43 2018 @@ -9091,17 +9091,13 @@ _mm_maskz_move_sd (__mmask8 __U, __m128d static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) { - __builtin_ia32_storess128_mask ((__v16sf *)__W, -(__v16sf) _mm512_castps128_ps512(__A), -(__mmask16) __U & (__mmask16)1
r332108 - [X86] Fix the file header name on fmaintrin.h
Author: ctopper Date: Fri May 11 10:37:40 2018 New Revision: 332108 URL: http://llvm.org/viewvc/llvm-project?rev=332108&view=rev Log: [X86] Fix the file header name on fmaintrin.h Modified: cfe/trunk/lib/Headers/fmaintrin.h Modified: cfe/trunk/lib/Headers/fmaintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/fmaintrin.h?rev=332108&r1=332107&r2=332108&view=diff == --- cfe/trunk/lib/Headers/fmaintrin.h (original) +++ cfe/trunk/lib/Headers/fmaintrin.h Fri May 11 10:37:40 2018 @@ -1,4 +1,4 @@ -/*=== fma4intrin.h - FMA4 intrinsics ---=== +/*=== fmaintrin.h - FMA intrinsics -=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r332203 - [X86] Emit better code for _mm_cvtu32_sd, _mm_cvtu64_sd, _mm_cvtu32_ss, and _mm_cvtu64_ss.
Author: ctopper Date: Sun May 13 16:03:30 2018 New Revision: 332203 URL: http://llvm.org/viewvc/llvm-project?rev=332203&view=rev Log: [X86] Emit better code for _mm_cvtu32_sd, _mm_cvtu64_sd, _mm_cvtu32_ss, and _mm_cvtu64_ss. We can use direct C code for these that will use uitofp and insertelement instructions. For the versions that take an explicit rounding mode we can't do this. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332203&r1=332202&r2=332203&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sun May 13 16:03:30 2018 @@ -1840,7 +1840,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtw2mask2 TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtusi2sd32, "V2dV2dUi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512vbmi") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs", "nc", "avx512vbmi,avx512vl") Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=332203&r1=332202&r2=332203&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Sun May 13 16:03:30 2018 @@ -9521,7 +9521,8 @@ _mm_maskz_cvtss_sd (__mmask8 __U, __m128 static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtu32_sd (__m128d __A, unsigned __B) { - return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B); + __A[0] = __B; + return __A; } #ifdef __x86_64__ @@ -9532,8 +9533,8 @@ _mm_cvtu32_sd (__m128d __A, unsigned __B static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtu64_sd (__m128d __A, unsigned long long __B) { - return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, - _MM_FROUND_CUR_DIRECTION); + __A[0] = __B; + return __A; } #endif @@ -9544,8 +9545,8 @@ _mm_cvtu64_sd (__m128d __A, unsigned lon static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtu32_ss (__m128 __A, unsigned __B) { - return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, -_MM_FROUND_CUR_DIRECTION); + __A[0] = __B; + return __A; } #ifdef __x86_64__ @@ -9556,8 +9557,8 @@ _mm_cvtu32_ss (__m128 __A, unsigned __B) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtu64_ss (__m128 __A, unsigned long long __B) { - return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, -_MM_FROUND_CUR_DIRECTION); + __A[0] = __B; + return __A; } #endif Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=332203&r1=332202&r2=332203&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sun May 13 16:03:30 2018 @@ -7007,7 +7007,8 @@ __m128d test_mm_maskz_cvt_roundss_sd( __ __m128d test_mm_cvtu32_sd(__m128d __A, unsigned __B) { // CHECK-LABEL: @test_mm_cvtu32_sd - // CHECK: @llvm.x86.avx512.cvtusi2sd + // CHECK: uitofp i32 %{{.*}} to double + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_cvtu32_sd(__A, __B); } @@ -7020,7 +7021,8 @@ __m128d test_mm_cvt_roundu64_sd(__m128d __m128d test_mm_cvtu64_sd(__m128d __A, unsigned long long __B) { // CHECK-LABEL: @test_mm_cvtu64_sd - // CHECK: @llvm.x86.avx512.cvtusi642sd + // CHECK: uitofp i64 %{{.*}} to double + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_cvtu64_sd(__A, __B); } #endif @@ -7033,7 +7035,8 @@ __m128 test_mm_cvt_roundu32_ss(__m128 __ __m128 test_mm_cvtu32_ss(__m128 __A, unsigned __B) { // CHECK-LABEL: @test_mm_cvtu32_ss - // CHECK: @llvm.x86.avx512.cvtusi2ss + // CHECK: uitofp i32 %{{.*}} to float + // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_cvtu32_ss(__A, __B); } @@ -7046,7 +7049,8 @@ __m128 test_mm_cvt_roundu64_ss(__m128 __ __m128 test_mm_cvtu64_ss(__m128 __A, unsigned long long __B) { // CHECK-LABEL: @test_mm_cvtu64_ss - // CHECK: @llvm.x86.avx512.cvtusi642ss + // CHECK: uitofp i64 %{{.*}} to float + // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 retu
r332210 - [X86] Use __builtin_convertvector to implement _mm512_cvtps_pd.
Author: ctopper Date: Sun May 13 21:05:06 2018 New Revision: 332210 URL: http://llvm.org/viewvc/llvm-project?rev=332210&view=rev Log: [X86] Use __builtin_convertvector to implement _mm512_cvtps_pd. If we're using default rounding mode we can let __builtin_convertvector to generate an fpextend. This matches 128 and 256 bit. If we're using the version that takes an explicit rounding mode argument we would need to look at the immediate to see if its CUR_DIRECTION. Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=332210&r1=332209&r2=332210&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Sun May 13 21:05:06 2018 @@ -9311,11 +9311,7 @@ _mm512_maskz_expand_epi32 (__mmask16 __U static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtps_pd (__m256 __A) { - return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, -(__v8df) -_mm512_undefined_pd (), -(__mmask8) -1, -_MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df); } static __inline__ __m512d __DEFAULT_FN_ATTRS Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=332210&r1=332209&r2=332210&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sun May 13 21:05:06 2018 @@ -6630,14 +6630,14 @@ __m512d test_mm512_maskz_cvt_roundps_pd( __m512d test_mm512_cvtps_pd(__m256 __A) { // CHECK-LABEL: @test_mm512_cvtps_pd - // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + // CHECK: fpext <8 x float> %{{.*}} to <8 x double> return _mm512_cvtps_pd(__A); } __m512d test_mm512_cvtpslo_pd(__m512 __A) { // CHECK-LABEL: @test_mm512_cvtpslo_pd // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <8 x i32> - // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + // CHECK: fpext <8 x float> %{{.*}} to <8 x double> return _mm512_cvtpslo_pd(__A); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r332213 - [X86] Use select instrution and fpextend in the implementation of _mm512_mask_cvtps_pd and _mm512_maskz_cvtps_pd.
Author: ctopper Date: Sun May 13 21:57:46 2018 New Revision: 332213 URL: http://llvm.org/viewvc/llvm-project?rev=332213&view=rev Log: [X86] Use select instrution and fpextend in the implementation of _mm512_mask_cvtps_pd and _mm512_maskz_cvtps_pd. Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=332213&r1=332212&r2=332213&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Sun May 13 21:57:46 2018 @@ -9317,20 +9317,17 @@ _mm512_cvtps_pd (__m256 __A) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) { - return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, -(__v8df) __W, -(__mmask8) __U, -_MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_cvtps_pd(__A), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) { - return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, -(__v8df) -_mm512_setzero_pd (), -(__mmask8) __U, -_MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_cvtps_pd(__A), + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=332213&r1=332212&r2=332213&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sun May 13 21:57:46 2018 @@ -6643,20 +6643,23 @@ __m512d test_mm512_cvtpslo_pd(__m512 __A __m512d test_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm512_mask_cvtps_pd - // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + // CHECK: fpext <8 x float> %{{.*}} to <8 x double> + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_cvtps_pd(__W, __U, __A); } __m512d test_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) { // CHECK-LABEL: @test_mm512_mask_cvtpslo_pd // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <8 x i32> - // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + // CHECK: fpext <8 x float> %{{.*}} to <8 x double> + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_cvtpslo_pd(__W, __U, __A); } __m512d test_mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm512_maskz_cvtps_pd - // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + // CHECK: fpext <8 x float> %{{.*}} to <8 x double> + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_cvtps_pd(__U, __A); } __m512d test_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r332266 - [X86] Use __builtin_convertvector to replace some of the avx512 truncate builtins.
Author: ctopper Date: Mon May 14 10:50:40 2018 New Revision: 332266 URL: http://llvm.org/viewvc/llvm-project?rev=332266&view=rev Log: [X86] Use __builtin_convertvector to replace some of the avx512 truncate builtins. As long as the destination type is a 256 or 128 bit vector with the same number of elements we can use __builtin_convertvector to directly generate trunc IR instruction which will be handled natively by the backend. Differential Revision: https://reviews.llvm.org/D46742 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vlbwintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c cfe/trunk/test/CodeGen/avx512vlbw-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332266&r1=332265&r2=332266&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 14 10:50:40 2018 @@ -1355,7 +1355,6 @@ TARGET_BUILTIN(__builtin_ia32_vpshrdw512 TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq") @@ -1397,7 +1396,6 @@ TARGET_BUILTIN(__builtin_ia32_pmovswb256 TARGET_BUILTIN(__builtin_ia32_pmovuswb128_mask, "V16cV8sV16cUc", "nc", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovuswb256_mask, "V16cV16sV16cUs", "nc", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovwb128_mask, "V16cV8sV16cUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovwb256_mask, "V16cV16sV16cUs", "nc", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8LLiV8fV8LLiUcIi", "nc", "avx512dq") @@ -1719,16 +1717,12 @@ TARGET_BUILTIN(__builtin_ia32_pmovusqw12 TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8LLiV8iUc", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc", "n", "avx512vl,avx512bw") @@ -1738,7 +1732,6 @@ TARGET_BUILTIN(__builtin_ia32_pmovdb256m TARGET_BUILTIN(__builtin_ia32_pmovwb256mem_mask, "vV16c*V16sUs", "n", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2LLiV16cUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2LLiUc", "n", "avx512vl") @@ -1746,7 +1739,6 @@ TARGET_BUILTIN(__builtin_ia32_pmovqb256_ TARGET_BUILTIN(__builtin_ia32_pmovqb256mem_mask, "vV16c*V4LLiUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovqd128_mask, "V4iV2LLiV4iUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovqd128mem_mask, "vV4i*V2LLiUc", "n", "avx512
r332322 - [X86] Revert part of r332266: Use __builtin_convertvector to replace some of the avx512 truncate builtins.
Author: ctopper Date: Mon May 14 20:17:52 2018 New Revision: 332322 URL: http://llvm.org/viewvc/llvm-project?rev=332322&view=rev Log: [X86] Revert part of r332266: Use __builtin_convertvector to replace some of the avx512 truncate builtins. The masking doesn't work right in the backend for the ones that produce byte or word elements without avx512bw. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332322&r1=332321&r2=332322&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 14 20:17:52 2018 @@ -1717,12 +1717,15 @@ TARGET_BUILTIN(__builtin_ia32_pmovusqw12 TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "n", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc", "n", "avx512vl,avx512bw") @@ -1732,6 +1735,7 @@ TARGET_BUILTIN(__builtin_ia32_pmovdb256m TARGET_BUILTIN(__builtin_ia32_pmovwb256mem_mask, "vV16c*V16sUs", "n", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2LLiV16cUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2LLiUc", "n", "avx512vl") Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=332322&r1=332321&r2=332322&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Mon May 14 20:17:52 2018 @@ -7607,17 +7607,16 @@ _mm512_cvtepi32_epi8 (__m512i __A) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) { - return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M, - (__v16qi)_mm512_cvtepi32_epi8(__A), - (__v16qi)__O); + return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, + (__v16qi) __O, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) { - return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M, - (__v16qi)_mm512_cvtepi32_epi8(__A), - (__v16qi)_mm_setzero_si128()); + return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, + (__v16qi) _mm_setzero_si128 (), + __M); } static __inline__ void __DEFAULT_FN_ATTRS @@ -7635,17 +7634,16 @@ _mm512_cvtepi32_epi16 (__m512i __A) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) { - return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, - (__v16hi)_mm512_cvtepi32_epi16(__A), - (__v16hi)__O); + return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, + (__v16hi) __O, __M); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) { - return (__m256i)__builtin_ia32_selectw_256((__mmask16)__
r332738 - [X86] Fix a bad cast from mask16 to mask8 in _mm256_mask_cvtepi16_epi8 introduced in r332266.
Author: ctopper Date: Fri May 18 10:18:46 2018 New Revision: 332738 URL: http://llvm.org/viewvc/llvm-project?rev=332738&view=rev Log: [X86] Fix a bad cast from mask16 to mask8 in _mm256_mask_cvtepi16_epi8 introduced in r332266. Modified: cfe/trunk/lib/Headers/avx512vlbwintrin.h Modified: cfe/trunk/lib/Headers/avx512vlbwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlbwintrin.h?rev=332738&r1=332737&r2=332738&view=diff == --- cfe/trunk/lib/Headers/avx512vlbwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vlbwintrin.h Fri May 18 10:18:46 2018 @@ -1556,14 +1556,14 @@ _mm256_cvtepi16_epi8 (__m256i __A) { static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { - return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M, + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm256_cvtepi16_epi8(__A), (__v16qi)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) { - return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M, + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm256_cvtepi16_epi8(__A), (__v16qi)_mm_setzero_si128()); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r332825 - [X86] Remove mask arguments from permvar builtins/intrinsics. Use a select in IR instead.
Author: ctopper Date: Sun May 20 16:34:10 2018 New Revision: 332825 URL: http://llvm.org/viewvc/llvm-project?rev=332825&view=rev Log: [X86] Remove mask arguments from permvar builtins/intrinsics. Use a select in IR instead. Someday maybe we'll use selects for all the builtins. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vbmiintrin.h cfe/trunk/lib/Headers/avx512vbmivlintrin.h cfe/trunk/lib/Headers/avx512vlbwintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vbmi-builtins.c cfe/trunk/test/CodeGen/avx512vbmivl-builtin.c cfe/trunk/test/CodeGen/avx512vl-builtins.c cfe/trunk/test/CodeGen/avx512vlbw-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332825&r1=332824&r2=332825&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sun May 20 16:34:10 2018 @@ -1766,18 +1766,18 @@ TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_ TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_vfnmsubss3_mask3, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvarhi512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_permvardf512_mask, "V8dV8dV8LLiV8dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvardi512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvarsf512_mask, "V16fV16fV16iV16fUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvarsi512_mask, "V16iV16iV16iV16iUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvarqi512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512vbmi") -TARGET_BUILTIN(__builtin_ia32_permvarqi128_mask, "V16cV16cV16cV16cUs", "nc", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvarqi256_mask, "V32cV32cV32cV32cUi", "nc", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvarhi128_mask, "V8sV8sV8sV8sUc", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvarhi256_mask, "V16sV16sV16sV16sUs", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvardf256_mask, "V4dV4dV4LLiV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvardi256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarhi512, "V32sV32sV32s", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8LLi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8LLiV8LLiV8LLi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarsf512, "V16fV16fV16i", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarsi512, "V16iV16iV16i", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarqi512, "V64cV64cV64c", "nc", "avx512vbmi") +TARGET_BUILTIN(__builtin_ia32_permvarqi128, "V16cV16cV16c", "nc", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarqi256, "V32cV32cV32c", "nc", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarhi128, "V8sV8sV8s", "nc", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarhi256, "V16sV16sV16s", "nc", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvardf256, "V4dV4dV4LLi", "nc", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvardi256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc", "nc", "avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc", "nc", "avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc", "nc", "avx512dq,avx512vl") Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=332825&r1=332824&r2=332825&view=diff == --- cfe/trunk/lib/Headers/avx512bwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bwintrin.h Sun May 20 16:34:10 2018 @@ -2034,30 +2034,25 @@ _mm512_maskz_broadcastw_epi16 (__mmask32 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_permutexvar_epi16 (__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B, - (__v32hi) __A, - (__v32hi) _mm512_undefined_epi32 (), - (__mmask32) -1); + return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_p
r332829 - [X86] Remove some unused builtins.
Author: ctopper Date: Sun May 20 20:36:57 2018 New Revision: 332829 URL: http://llvm.org/viewvc/llvm-project?rev=332829&view=rev Log: [X86] Remove some unused builtins. These were upgraded to native shufflevectors months ago. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Sema/SemaChecking.cpp Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332829&r1=332828&r2=332829&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sun May 20 20:36:57 2018 @@ -1616,14 +1616,6 @@ TARGET_BUILTIN(__builtin_ia32_pternlogq1 TARGET_BUILTIN(__builtin_ia32_pternlogq128_maskz, "V2LLiV2LLiV2LLiV2LLiIiUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pternlogq256_mask, "V4LLiV4LLiV4LLiV4LLiIiUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pternlogq256_maskz, "V4LLiV4LLiV4LLiV4LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_mask, "V16fV16fV16fIiV16fUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_mask, "V8dV8dV8dIiV8dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_mask, "V16iV16iV16iIiV16iUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256_mask, "V8fV8fV8fIiV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256_mask, "V4dV4dV4dIiV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256_mask, "V8iV8iV8iIiV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_sqrtsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_sqrtss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_rsqrt14pd128_mask, "V2dV2dV2dUc", "nc", "avx512vl") Modified: cfe/trunk/lib/Sema/SemaChecking.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=332829&r1=332828&r2=332829&view=diff == --- cfe/trunk/lib/Sema/SemaChecking.cpp (original) +++ cfe/trunk/lib/Sema/SemaChecking.cpp Sun May 20 20:36:57 2018 @@ -2574,10 +2574,6 @@ bool Sema::CheckX86BuiltinFunctionCall(u i = 1; l = 0; u = 7; break; case X86::BI__builtin_ia32_sha1rnds4: - case X86::BI__builtin_ia32_shuf_f32x4_256_mask: - case X86::BI__builtin_ia32_shuf_f64x2_256_mask: - case X86::BI__builtin_ia32_shuf_i32x4_256_mask: - case X86::BI__builtin_ia32_shuf_i64x2_256_mask: i = 2; l = 0; u = 3; break; case X86::BI__builtin_ia32_vpermil2pd: @@ -2696,10 +2692,6 @@ bool Sema::CheckX86BuiltinFunctionCall(u case X86::BI__builtin_ia32_palignr512_mask: case X86::BI__builtin_ia32_vcomisd: case X86::BI__builtin_ia32_vcomiss: - case X86::BI__builtin_ia32_shuf_f32x4_mask: - case X86::BI__builtin_ia32_shuf_f64x2_mask: - case X86::BI__builtin_ia32_shuf_i32x4_mask: - case X86::BI__builtin_ia32_shuf_i64x2_mask: case X86::BI__builtin_ia32_dbpsadbw128_mask: case X86::BI__builtin_ia32_dbpsadbw256_mask: case X86::BI__builtin_ia32_dbpsadbw512_mask: ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r332830 - [X86] Remove some preprocessor feature checks from intrinsic headers
Author: ctopper Date: Sun May 20 23:07:49 2018 New Revision: 332830 URL: http://llvm.org/viewvc/llvm-project?rev=332830&view=rev Log: [X86] Remove some preprocessor feature checks from intrinsic headers Summary: These look to be a couple things that weren't removed when we switched to target attribute. The popcnt makes including just smmintrin.h also include popcntintrin.h. The popcnt file itself already contains target attrributes. The prefetch ones are just wrappers around __builtin_prefetch which we have graceful fallbacks for in the backend if the exact instruction isn't available. So there's no reason to hide them. And it makes them available in functions that have the write target attribute but not a -march command line flag. Reviewers: echristo, RKSimon, spatel, DavidKreitzer Reviewed By: echristo Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D47029 Modified: cfe/trunk/lib/Headers/prfchwintrin.h cfe/trunk/lib/Headers/smmintrin.h Modified: cfe/trunk/lib/Headers/prfchwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/prfchwintrin.h?rev=332830&r1=332829&r2=332830&view=diff == --- cfe/trunk/lib/Headers/prfchwintrin.h (original) +++ cfe/trunk/lib/Headers/prfchwintrin.h Sun May 20 23:07:49 2018 @@ -28,7 +28,6 @@ #ifndef __PRFCHWINTRIN_H #define __PRFCHWINTRIN_H -#if defined(__PRFCHW__) || defined(__3dNOW__) /// Loads a memory sequence containing the specified memory address into ///all data cache levels. The cache-coherency state is set to exclusive. ///Data can be read from and written to the cache line without additional @@ -66,6 +65,5 @@ _m_prefetchw(void *__P) { __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */); } -#endif #endif /* __PRFCHWINTRIN_H */ Modified: cfe/trunk/lib/Headers/smmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/smmintrin.h?rev=332830&r1=332829&r2=332830&view=diff == --- cfe/trunk/lib/Headers/smmintrin.h (original) +++ cfe/trunk/lib/Headers/smmintrin.h Sun May 20 23:07:49 2018 @@ -2458,8 +2458,6 @@ _mm_crc32_u64(unsigned long long __C, un #undef __DEFAULT_FN_ATTRS -#ifdef __POPCNT__ #include -#endif #endif /* __SMMINTRIN_H */ ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r332882 - [X86] Use __builtin_convertvector to implement some of the packed integer to packed float conversion intrinsics.
Author: ctopper Date: Mon May 21 13:19:17 2018 New Revision: 332882 URL: http://llvm.org/viewvc/llvm-project?rev=332882&view=rev Log: [X86] Use __builtin_convertvector to implement some of the packed integer to packed float conversion intrinsics. I believe this is safe assuming default default FP environment. The conversion might be inexact, but it can never overflow the FP type so this shouldn't be undefined behavior for the uitofp/sitofp instructions. We already do something similar for scalar conversions. Differential Revision: https://reviews.llvm.org/D46863 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vldqintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/test/CodeGen/avx-builtins.c cfe/trunk/test/CodeGen/avx512dq-builtins.c cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c cfe/trunk/test/CodeGen/avx512vldq-builtins.c cfe/trunk/test/CodeGen/builtins-x86.c cfe/trunk/test/CodeGen/sse2-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332882&r1=332881&r2=332882&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 21 13:19:17 2018 @@ -320,7 +320,6 @@ TARGET_BUILTIN(__builtin_ia32_movnti, "v TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtdq2ps, "V4fV4i", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "nc", "sse2") @@ -1200,8 +1199,6 @@ TARGET_BUILTIN(__builtin_ia32_cvttpd2udq TARGET_BUILTIN(__builtin_ia32_cvttpd2udq256_mask, "V4iV4dV4iUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtudq2ps128_mask, "V4fV4iV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtudq2ps256_mask, "V8fV8iV8fUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "nc", "avx512vl") @@ -1363,8 +1360,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtps2qq12 TARGET_BUILTIN(__builtin_ia32_cvtps2qq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_cvtps2uqq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_cvtps2uqq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtqq2pd128_mask, "V2dV2LLiV2dUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtqq2pd256_mask, "V4dV4LLiV4dUc", "nc", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_cvtqq2ps128_mask, "V4fV2LLiV4fUc", "nc", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_cvtqq2ps256_mask, "V4fV4LLiV4fUc", "nc", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_cvttpd2qq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq") @@ -1375,8 +1370,6 @@ TARGET_BUILTIN(__builtin_ia32_cvttps2qq1 TARGET_BUILTIN(__builtin_ia32_cvttps2qq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_cvttps2uqq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_cvttps2uqq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd128_mask, "V2dV2LLiV2dUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd256_mask, "V4dV4LLiV4dUc", "nc", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps128_mask, "V4fV2LLiV4fUc", "nc", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps256_mask, "V4fV4LLiV4fUc", "nc", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_rangepd128_mask, "V2dV2dV2dIiV2dUc", "nc", "avx512vl,avx512dq") Modified: cfe/trunk/lib/Headers/avx512dqintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=332882&r1=332881&r2=332882&view=diff == --- cfe/trunk/lib/Headers/avx512dqintrin.h (original) +++ cfe/trunk/lib/Headers/avx512dqintrin.h Mon May 21 13:19:17 2018 @@ -361,26 +361,21 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U, static __inline__ __m512d __DEFAULT_
r332891 - [X86] Remove masking from pternlog llvm intrinsics and use a select instruction instead.
Author: ctopper Date: Mon May 21 13:58:23 2018 New Revision: 332891 URL: http://llvm.org/viewvc/llvm-project?rev=332891&view=rev Log: [X86] Remove masking from pternlog llvm intrinsics and use a select instruction instead. Because the intrinsics in the headers are implemented as macros, we can't just use a select builtin and pternlog builtin. This would require one of the macro arguments to be used twice. Depending on what was passed to the macro we could expand an expression twice leading to weird behavior. We could maybe declare our local variable in the macro, but that would need to worry about name collisions. To avoid that just generate IR directly in CGBuiltin.cpp. Differential Revision: https://reviews.llvm.org/D47125 Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=332891&r1=332890&r2=332891&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon May 21 13:58:23 2018 @@ -8445,6 +8445,37 @@ static Value *EmitX86Muldq(CodeGenFuncti return CGF.Builder.CreateMul(LHS, RHS); } +// Emit a masked pternlog intrinsic. This only exists because the header has to +// use a macro and we aren't able to pass the input argument to a pternlog +// builtin and a select builtin without evaluating it twice. +static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, + ArrayRef Ops) { + llvm::Type *Ty = Ops[0]->getType(); + + unsigned VecWidth = Ty->getPrimitiveSizeInBits(); + unsigned EltWidth = Ty->getScalarSizeInBits(); + Intrinsic::ID IID; + if (VecWidth == 128 && EltWidth == 32) +IID = Intrinsic::x86_avx512_pternlog_d_128; + else if (VecWidth == 256 && EltWidth == 32) +IID = Intrinsic::x86_avx512_pternlog_d_256; + else if (VecWidth == 512 && EltWidth == 32) +IID = Intrinsic::x86_avx512_pternlog_d_512; + else if (VecWidth == 128 && EltWidth == 64) +IID = Intrinsic::x86_avx512_pternlog_q_128; + else if (VecWidth == 256 && EltWidth == 64) +IID = Intrinsic::x86_avx512_pternlog_q_256; + else if (VecWidth == 512 && EltWidth == 64) +IID = Intrinsic::x86_avx512_pternlog_q_512; + else +llvm_unreachable("Unexpected intrinsic"); + + Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), + Ops.drop_back()); + Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0]; + return EmitX86Select(CGF, Ops[4], Ternlog, PassThru); +} + static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy) { unsigned NumberOfElements = DstTy->getVectorNumElements(); @@ -9159,6 +9190,22 @@ Value *CodeGenFunction::EmitX86BuiltinEx case X86::BI__builtin_ia32_pmuldq512: return EmitX86Muldq(*this, /*IsSigned*/true, Ops); + case X86::BI__builtin_ia32_pternlogd512_mask: + case X86::BI__builtin_ia32_pternlogq512_mask: + case X86::BI__builtin_ia32_pternlogd128_mask: + case X86::BI__builtin_ia32_pternlogd256_mask: + case X86::BI__builtin_ia32_pternlogq128_mask: + case X86::BI__builtin_ia32_pternlogq256_mask: +return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops); + + case X86::BI__builtin_ia32_pternlogd512_maskz: + case X86::BI__builtin_ia32_pternlogq512_maskz: + case X86::BI__builtin_ia32_pternlogd128_maskz: + case X86::BI__builtin_ia32_pternlogd256_maskz: + case X86::BI__builtin_ia32_pternlogq128_maskz: + case X86::BI__builtin_ia32_pternlogq256_maskz: +return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops); + // 3DNow! case X86::BI__builtin_ia32_pswapdsf: case X86::BI__builtin_ia32_pswapdsi: { Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=332891&r1=332890&r2=332891&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Mon May 21 13:58:23 2018 @@ -4494,37 +4494,41 @@ __m512i test_mm512_maskz_srlv_epi64(__mm __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) { // CHECK-LABEL: @test_mm512_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.mask.pternlog.d.512 + // CHECK: @llvm.x86.avx512.pternlog.d.512 return _mm512_ternarylogic_epi32(__A, __B, __C, 4); } __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { // CHECK-LABEL: @test_mm512_mask_ternarylogic_epi32 - // CHECK: @llvm.x86.avx512.mask.pternlog.d.512 + // CHECK: @llvm.x86.avx512.pternlog.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_tern
r332909 - [X86] Remove a builtin that should have been removed in r332882.
Author: ctopper Date: Mon May 21 15:10:02 2018 New Revision: 332909 URL: http://llvm.org/viewvc/llvm-project?rev=332909&view=rev Log: [X86] Remove a builtin that should have been removed in r332882. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/test/CodeGen/builtins-x86.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332909&r1=332908&r2=332909&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 21 15:10:02 2018 @@ -477,7 +477,6 @@ TARGET_BUILTIN(__builtin_ia32_cmpps, "V4 TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "nc", "avx") Modified: cfe/trunk/test/CodeGen/builtins-x86.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-x86.c?rev=332909&r1=332908&r2=332909&view=diff == --- cfe/trunk/test/CodeGen/builtins-x86.c (original) +++ cfe/trunk/test/CodeGen/builtins-x86.c Mon May 21 15:10:02 2018 @@ -433,7 +433,6 @@ void f0() { tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7); tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0); tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0); - tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i); tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d); tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f); tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r332929 - [X86] Prevent inclusion of __wmmintrin_aes.h and __wmmintrin_pclmul.h without including wmmintrin.h
Author: ctopper Date: Mon May 21 19:02:13 2018 New Revision: 332929 URL: http://llvm.org/viewvc/llvm-project?rev=332929&view=rev Log: [X86] Prevent inclusion of __wmmintrin_aes.h and __wmmintrin_pclmul.h without including wmmintrin.h Modified: cfe/trunk/lib/Headers/__wmmintrin_aes.h cfe/trunk/lib/Headers/__wmmintrin_pclmul.h Modified: cfe/trunk/lib/Headers/__wmmintrin_aes.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__wmmintrin_aes.h?rev=332929&r1=332928&r2=332929&view=diff == --- cfe/trunk/lib/Headers/__wmmintrin_aes.h (original) +++ cfe/trunk/lib/Headers/__wmmintrin_aes.h Mon May 21 19:02:13 2018 @@ -20,11 +20,14 @@ * *===---=== */ + +#ifndef __WMMINTRIN_H +#error "Never use <__wmmintrin_aes.h> directly; include instead." +#endif + #ifndef __WMMINTRIN_AES_H #define __WMMINTRIN_AES_H -#include - /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"))) Modified: cfe/trunk/lib/Headers/__wmmintrin_pclmul.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__wmmintrin_pclmul.h?rev=332929&r1=332928&r2=332929&view=diff == --- cfe/trunk/lib/Headers/__wmmintrin_pclmul.h (original) +++ cfe/trunk/lib/Headers/__wmmintrin_pclmul.h Mon May 21 19:02:13 2018 @@ -20,6 +20,11 @@ * *===---=== */ + +#ifndef __WMMINTRIN_H +#error "Never use <__wmmintrin_pclmul.h> directly; include instead." +#endif + #ifndef __WMMINTRIN_PCLMUL_H #define __WMMINTRIN_PCLMUL_H ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333014 - [X86] Move 128-bit f16c intrinsics to __emmintrin_f16c.h include from emmintrin.h. Move 256-bit f16c intrinsics back to f16cintrin.h
Author: ctopper Date: Tue May 22 11:54:19 2018 New Revision: 333014 URL: http://llvm.org/viewvc/llvm-project?rev=333014&view=rev Log: [X86] Move 128-bit f16c intrinsics to __emmintrin_f16c.h include from emmintrin.h. Move 256-bit f16c intrinsics back to f16cintrin.h Intel documents the 128-bit versions as being in emmintrin.h and the 256-bit version as being in immintrin.h. This patch makes a new __emmtrin_f16c.h to hold the 128-bit versions to be included from emmintrin.h. And makes the existing f16cintrin.h contain the 256-bit versions and include it from immintrin.h with an error if its included directly. Differential Revision: https://reviews.llvm.org/D47174 Added: cfe/trunk/lib/Headers/__emmintrin_f16c.h - copied, changed from r332998, cfe/trunk/lib/Headers/f16cintrin.h Modified: cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/lib/Headers/f16cintrin.h cfe/trunk/lib/Headers/immintrin.h Copied: cfe/trunk/lib/Headers/__emmintrin_f16c.h (from r332998, cfe/trunk/lib/Headers/f16cintrin.h) URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__emmintrin_f16c.h?p2=cfe/trunk/lib/Headers/__emmintrin_f16c.h&p1=cfe/trunk/lib/Headers/f16cintrin.h&r1=332998&r2=333014&rev=333014&view=diff == --- cfe/trunk/lib/Headers/f16cintrin.h (original) +++ cfe/trunk/lib/Headers/__emmintrin_f16c.h Tue May 22 11:54:19 2018 @@ -1,4 +1,4 @@ -/*=== f16cintrin.h - F16C intrinsics ---=== +/*=== __emmintrin_f16c.h - F16C intrinsics -=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -21,12 +21,12 @@ *===---=== */ -#if !defined __X86INTRIN_H && !defined __EMMINTRIN_H && !defined __IMMINTRIN_H -#error "Never use directly; include instead." +#if !defined __EMMINTRIN_H +#error "Never use <__emmintrin_f16c.h> directly; include instead." #endif -#ifndef __F16CINTRIN_H -#define __F16CINTRIN_H +#ifndef __EMMINTRIN_F16C_H +#define __EMMINTRIN_F16C_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ @@ -121,4 +121,4 @@ _mm_cvtph_ps(__m128i __a) #undef __DEFAULT_FN_ATTRS -#endif /* __F16CINTRIN_H */ +#endif /* __EMMINTRIN_F16C_H */ Modified: cfe/trunk/lib/Headers/emmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=333014&r1=333013&r2=333014&view=diff == --- cfe/trunk/lib/Headers/emmintrin.h (original) +++ cfe/trunk/lib/Headers/emmintrin.h Tue May 22 11:54:19 2018 @@ -44,7 +44,7 @@ typedef unsigned char __v16qu __attribut * appear in the interface though. */ typedef signed char __v16qs __attribute__((__vector_size__(16))); -#include +#include <__emmintrin_f16c.h> /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) Modified: cfe/trunk/lib/Headers/f16cintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/f16cintrin.h?rev=333014&r1=333013&r2=333014&view=diff == --- cfe/trunk/lib/Headers/f16cintrin.h (original) +++ cfe/trunk/lib/Headers/f16cintrin.h Tue May 22 11:54:19 2018 @@ -21,8 +21,8 @@ *===---=== */ -#if !defined __X86INTRIN_H && !defined __EMMINTRIN_H && !defined __IMMINTRIN_H -#error "Never use directly; include instead." +#if !defined __IMMINTRIN_H +#error "Never use directly; include instead." #endif #ifndef __F16CINTRIN_H @@ -32,63 +32,24 @@ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) -/// Converts a 16-bit half-precision float value into a 32-bit float -///value. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the VCVTPH2PS instruction. -/// -/// \param __a -///A 16-bit half-precision float value. -/// \returns The converted 32-bit float value. -static __inline float __DEFAULT_FN_ATTRS -_cvtsh_ss(unsigned short __a) -{ - __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0}; - __v4sf r = __builtin_ia32_vcvtph2ps(v); - return r[0]; -} - -/// Converts a 32-bit single-precision float value to a 16-bit -///half-precision float value. -/// -/// \headerfile -/// -/// \code -/// unsigned short _cvtss_sh(float a, const int imm); -/// \endcode -/// -/// This intrinsic corresponds to the VCVTPS2PH instruction. -/// -/// \param a -///A 32-bit single-precision float value to be converted to a 16-bit -///half-precision float value. -/// \param imm -///An immediate value controlling rounding using bits [2:0]: \n
r333020 - [X86] Add __emmintrin_f16c.h to module map and CMakeLists.
Author: ctopper Date: Tue May 22 13:19:05 2018 New Revision: 333020 URL: http://llvm.org/viewvc/llvm-project?rev=333020&view=rev Log: [X86] Add __emmintrin_f16c.h to module map and CMakeLists. I missed this in r333014 Modified: cfe/trunk/lib/Headers/CMakeLists.txt cfe/trunk/lib/Headers/module.modulemap Modified: cfe/trunk/lib/Headers/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/CMakeLists.txt?rev=333020&r1=333019&r2=333020&view=diff == --- cfe/trunk/lib/Headers/CMakeLists.txt (original) +++ cfe/trunk/lib/Headers/CMakeLists.txt Tue May 22 13:19:05 2018 @@ -46,6 +46,7 @@ set(files clflushoptintrin.h clwbintrin.h emmintrin.h + __emmintrin_f16c.h f16cintrin.h float.h fma4intrin.h Modified: cfe/trunk/lib/Headers/module.modulemap URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/module.modulemap?rev=333020&r1=333019&r2=333020&view=diff == --- cfe/trunk/lib/Headers/module.modulemap (original) +++ cfe/trunk/lib/Headers/module.modulemap Tue May 22 13:19:05 2018 @@ -95,9 +95,14 @@ module _Builtin_intrinsics [system] [ext explicit module sse2 { export sse + export f16c_128 header "emmintrin.h" } +explicit f16c_128 { + header "__emmintrin_f16c.h" +} + explicit module sse3 { export sse2 header "pmmintrin.h" ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333023 - [X86] Add two missing #endif directives to immintrin.h that should have been in r333014.
Author: ctopper Date: Tue May 22 13:33:04 2018 New Revision: 333023 URL: http://llvm.org/viewvc/llvm-project?rev=333023&view=rev Log: [X86] Add two missing #endif directives to immintrin.h that should have been in r333014. Modified: cfe/trunk/lib/Headers/immintrin.h Modified: cfe/trunk/lib/Headers/immintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=333023&r1=333022&r2=333023&view=diff == --- cfe/trunk/lib/Headers/immintrin.h (original) +++ cfe/trunk/lib/Headers/immintrin.h Tue May 22 13:33:04 2018 @@ -68,9 +68,11 @@ #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__) #include +#endif #if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__) #include +#endif #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__) #include ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333026 - [X86] Another attempt at fixing the intrinsic module map for rr333014.
Author: ctopper Date: Tue May 22 13:48:20 2018 New Revision: 333026 URL: http://llvm.org/viewvc/llvm-project?rev=333026&view=rev Log: [X86] Another attempt at fixing the intrinsic module map for rr333014. Modified: cfe/trunk/lib/Headers/module.modulemap Modified: cfe/trunk/lib/Headers/module.modulemap URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/module.modulemap?rev=333026&r1=333025&r2=333026&view=diff == --- cfe/trunk/lib/Headers/module.modulemap (original) +++ cfe/trunk/lib/Headers/module.modulemap Tue May 22 13:48:20 2018 @@ -99,7 +99,7 @@ module _Builtin_intrinsics [system] [ext header "emmintrin.h" } -explicit f16c_128 { +explicit module f16c_128 { header "__emmintrin_f16c.h" } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333027 - [X86] Remove mask argument from some builtins that are handled completely in CGBuiltin.cpp. Just wrap a select builtin around them in the header file instead.
Author: ctopper Date: Tue May 22 13:48:24 2018 New Revision: 333027 URL: http://llvm.org/viewvc/llvm-project?rev=333027&view=rev Log: [X86] Remove mask argument from some builtins that are handled completely in CGBuiltin.cpp. Just wrap a select builtin around them in the header file instead. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512cdintrin.h cfe/trunk/lib/Headers/avx512vlcdintrin.h cfe/trunk/lib/Sema/SemaChecking.cpp Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=333027&r1=333026&r2=333027&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue May 22 13:48:24 2018 @@ -1102,8 +1102,8 @@ TARGET_BUILTIN(__builtin_ia32_vpconflict TARGET_BUILTIN(__builtin_ia32_vpconflictsi_256_mask, "V8iV8iV8iUc", "nc", "avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512_mask, "V8LLiV8LLiV8LLiUc", "nc", "avx512cd") TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "nc", "avx512cd") -TARGET_BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "nc", "avx512cd") -TARGET_BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "nc", "avx512cd") +TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "nc", "avx512cd") +TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8LLiV8LLi", "nc", "avx512cd") TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "nc", "avx512vpopcntdq,avx512vl") TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2LLiV2LLi", "nc", "avx512vpopcntdq,avx512vl") @@ -1550,10 +1550,10 @@ TARGET_BUILTIN(__builtin_ia32_rcp14pd128 TARGET_BUILTIN(__builtin_ia32_rcp14pd256_mask, "V4dV4dV4dUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rcp14ps128_mask, "V4fV4fV4fUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rcp14ps256_mask, "V8fV8fV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vplzcntd_128_mask, "V4iV4iV4iUc", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vplzcntd_256_mask, "V8iV8iV8iUc", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vplzcntq_128_mask, "V2LLiV2LLiV2LLiUc", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vplzcntq_256_mask, "V4LLiV4LLiV4LLiUc", "nc", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vplzcntd_128, "V4iV4i", "nc", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vplzcntd_256, "V8iV8i", "nc", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vplzcntq_128, "V2LLiV2LLi", "nc", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vplzcntq_256, "V4LLiV4LLi", "nc", "avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_vcvtsd2si32, "iV2dIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi32, "UiV2dIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtss2si32, "iV4fIi", "nc", "avx512f") @@ -1778,7 +1778,7 @@ TARGET_BUILTIN(__builtin_ia32_kortestzhi TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_palignr512_mask, "V64cV64cV64cIiV64cULLi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_dbpsadbw128_mask, "V8sV16cV16cIiV8sUc", "nc", "avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_dbpsadbw256_mask, "V16sV32cV32cIiV16sUs", "nc", "avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_dbpsadbw512_mask, "V32sV64cV64cIiV32sUi", "nc", "avx512bw") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=333027&r1=333026&r2=333027&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue May 22 13:48:24 2018 @@ -8890,7 +8890,7 @@ Value *CodeGenFunction::EmitX86BuiltinEx } case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: - case X86::BI__builtin_ia32_palignr512_mask: { + case X86::BI__builtin_ia32_palignr512: { unsigned ShiftVal = cast(Ops[2])->getZExtValue(); unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); @@ -8920,15 +8920,9 @@ Value *CodeGenFunction::EmitX86BuiltinEx } } -Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], - makeArrayRef(Indices, NumElts), - "palignr"); - -// If this isn't a masked builtin, just return the align operation. -if (Ops.size() == 3) - return Align; - -return EmitX86Select(*this, Ops[4], Align, Ops[3])
r333033 - [X86] As mentioned in post-commit feedback in D47174, move the 128 bit f16c intrinsics into f16cintrin.h and remove __emmintrin_f16c.h
Author: ctopper Date: Tue May 22 15:19:19 2018 New Revision: 333033 URL: http://llvm.org/viewvc/llvm-project?rev=333033&view=rev Log: [X86] As mentioned in post-commit feedback in D47174, move the 128 bit f16c intrinsics into f16cintrin.h and remove __emmintrin_f16c.h These were included in emmintrin.h to match Intel Intrinsics Guide documentation. But this is because icc is capable of emulating them on targets that don't support F16C using library calls. Clang/LLVM doesn't have this emulation support. So it makes more sense to include them in immintrin.h instead. I've left a comment behind to hopefully deter someone from trying to move them again in the future. Removed: cfe/trunk/lib/Headers/__emmintrin_f16c.h Modified: cfe/trunk/lib/Headers/CMakeLists.txt cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/lib/Headers/f16cintrin.h cfe/trunk/lib/Headers/module.modulemap Modified: cfe/trunk/lib/Headers/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/CMakeLists.txt?rev=333033&r1=333032&r2=333033&view=diff == --- cfe/trunk/lib/Headers/CMakeLists.txt (original) +++ cfe/trunk/lib/Headers/CMakeLists.txt Tue May 22 15:19:19 2018 @@ -46,7 +46,6 @@ set(files clflushoptintrin.h clwbintrin.h emmintrin.h - __emmintrin_f16c.h f16cintrin.h float.h fma4intrin.h Removed: cfe/trunk/lib/Headers/__emmintrin_f16c.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__emmintrin_f16c.h?rev=333032&view=auto == --- cfe/trunk/lib/Headers/__emmintrin_f16c.h (original) +++ cfe/trunk/lib/Headers/__emmintrin_f16c.h (removed) @@ -1,124 +0,0 @@ -/*=== __emmintrin_f16c.h - F16C intrinsics -=== - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - *===---=== - */ - -#if !defined __EMMINTRIN_H -#error "Never use <__emmintrin_f16c.h> directly; include instead." -#endif - -#ifndef __EMMINTRIN_F16C_H -#define __EMMINTRIN_F16C_H - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) - -/// Converts a 16-bit half-precision float value into a 32-bit float -///value. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the VCVTPH2PS instruction. -/// -/// \param __a -///A 16-bit half-precision float value. -/// \returns The converted 32-bit float value. -static __inline float __DEFAULT_FN_ATTRS -_cvtsh_ss(unsigned short __a) -{ - __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0}; - __v4sf r = __builtin_ia32_vcvtph2ps(v); - return r[0]; -} - -/// Converts a 32-bit single-precision float value to a 16-bit -///half-precision float value. -/// -/// \headerfile -/// -/// \code -/// unsigned short _cvtss_sh(float a, const int imm); -/// \endcode -/// -/// This intrinsic corresponds to the VCVTPS2PH instruction. -/// -/// \param a -///A 32-bit single-precision float value to be converted to a 16-bit -///half-precision float value. -/// \param imm -///An immediate value controlling rounding using bits [2:0]: \n -///000: Nearest \n -///001: Down \n -///010: Up \n -///011: Truncate \n -///1XX: Use MXCSR.RC for rounding -/// \returns The converted 16-bit half-precision float value. -#define _cvtss_sh(a, imm) __extension__ ({ \ - (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \ - (imm)))[0]); }) - -/// Converts a 128-bit vector containing 32-bit float values into a -///128-bit vector containing 16-bit half-precision float values. -/// -/// \headerfile -/// -/// \code -/// __m128i _mm_cvtps_ph(__m128 a, const int imm); -/// \endcode -/// -/// T
r333062 - [X86] In the floating point max reduction intrinsics, negate infinity before feeding it to set1.
Author: ctopper Date: Tue May 22 22:51:52 2018 New Revision: 333062 URL: http://llvm.org/viewvc/llvm-project?rev=333062&view=rev Log: [X86] In the floating point max reduction intrinsics, negate infinity before feeding it to set1. Previously we negated the whole vector after splatting infinity. But its better to negate the infinity before splatting. This generates IR with the negate already folded with the infinity constant. Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=333062&r1=333061&r2=333062&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Tue May 22 22:51:52 2018 @@ -9956,7 +9956,7 @@ _mm512_mask_reduce_max_epu64(__mmask8 __ static __inline__ double __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) { - _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()), + _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(-__builtin_inf()), max_pd, d, f, pd, __M); } @@ -10099,7 +10099,7 @@ _mm512_mask_reduce_max_epu32(__mmask16 _ static __inline__ float __DEFAULT_FN_ATTRS _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) { - _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f, + _mm512_mask_reduce_maxMin_32bit(__V,_mm512_set1_ps(-__builtin_inff()), max_ps, , f, ps, __M); } Modified: cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c?rev=333062&r1=333061&r2=333062&view=diff == --- cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c (original) +++ cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c Tue May 22 22:51:52 2018 @@ -564,7 +564,7 @@ unsigned long test_mm512_mask_reduce_max // CHECK: store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64 // CHECK: [[TMP2:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 // CHECK: [[TMP3:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 -// CHECK: store double 0x7FF0, double* [[__W_ADDR_I_I]], align 8 +// CHECK: store double 0xFFF0, double* [[__W_ADDR_I_I]], align 8 // CHECK: [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 // CHECK: [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double [[TMP4]], i32 0 // CHECK: [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 @@ -583,9 +583,8 @@ unsigned long test_mm512_mask_reduce_max // CHECK: [[VECINIT7_I_I:%.*]] = insertelement <8 x double> [[VECINIT6_I_I]], double [[TMP11]], i32 7 // CHECK: store <8 x double> [[VECINIT7_I_I]], <8 x double>* [[_COMPOUNDLITERAL_I_I]], align 64 // CHECK: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I]], align 64 -// CHECK: [[SUB_I:%.*]] = fsub <8 x double> , [[TMP12]] // CHECK: [[TMP13:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> -// CHECK: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x double> [[TMP3]], <8 x double> [[SUB_I]] +// CHECK: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x double> [[TMP3]], <8 x double> [[TMP12]] // CHECK: store <8 x double> [[TMP14]], <8 x double>* [[__V_ADDR_I]], align 64 // CHECK: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 // CHECK: [[TMP16:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 @@ -1859,7 +1858,7 @@ unsigned int test_mm512_mask_reduce_max_ // CHECK: store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64 // CHECK: [[TMP2:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 // CHECK: [[TMP3:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 -// CHECK: store float 0x7FF0, float* [[__W_ADDR_I_I]], align 4 +// CHECK: store float 0xFFF0, float* [[__W_ADDR_I_I]], align 4 // CHECK: [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 // CHECK: [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float [[TMP4]], i32 0 // CHECK: [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 @@ -1894,9 +1893,8 @@ unsigned int test_mm512_mask_reduce_max_ // CHECK: [[VECINIT15_I_I:%.*]] = insertelement <16 x float> [[VECINIT14_I_I]], float [[TMP19]], i32 15 // CHECK: store <16 x float> [[VECINIT15_I_I]], <16 x float>* [[_COMPOUNDLITERAL_I_I]], align 64 // CHECK: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I]], align 64 -// CHECK: [[SUB_I:%.*]] = fsub <16 x float> , [[TMP20]] // CHECK: [[TMP21:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> -// CHECK: [[TMP22:%.*]] = select <16 x i1> [[TMP21]], <16 x fl
r333064 - [X86] Undef the vector reduction helper macros when we're done with them.
Author: ctopper Date: Tue May 22 23:31:36 2018 New Revision: 333064 URL: http://llvm.org/viewvc/llvm-project?rev=333064&view=rev Log: [X86] Undef the vector reduction helper macros when we're done with them. These are implementation helper macros we shouldn't expose them to user code if we don't need to. Modified: cfe/trunk/lib/Headers/avx512fintrin.h Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=333064&r1=333063&r2=333064&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Tue May 22 23:31:36 2018 @@ -9713,6 +9713,8 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M, f, d, pd); } +#undef _mm512_reduce_operator_64bit +#undef _mm512_mask_reduce_operator_64bit // Vec512 - Vector with size 512. // Operator - Can be one of following: +,*,&,| @@ -9840,6 +9842,8 @@ static __inline__ float __DEFAULT_FN_ATT _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps); } +#undef _mm512_reduce_operator_32bit +#undef _mm512_mask_reduce_operator_32bit // Used bisection method. At each step, we partition the vector with previous // step in half, and the operation is performed on its two halves. @@ -9977,6 +9981,8 @@ _mm512_mask_reduce_min_pd(__mmask8 __M, _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()), min_pd, d, f, pd, __M); } +#undef _mm512_reduce_maxMin_64bit +#undef _mm512_mask_reduce_maxMin_64bit // Vec512 - Vector with size 512. // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example: @@ -10120,6 +10126,8 @@ _mm512_mask_reduce_min_ps(__mmask16 __M, _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f, ps, __M); } +#undef _mm512_reduce_maxMin_32bit +#undef _mm512_mask_reduce_maxMin_32bit #undef __DEFAULT_FN_ATTRS ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333110 - [X86] Move all Intel defined intrinsic includes into immintrin.h
Author: ctopper Date: Wed May 23 11:32:58 2018 New Revision: 333110 URL: http://llvm.org/viewvc/llvm-project?rev=333110&view=rev Log: [X86] Move all Intel defined intrinsic includes into immintrin.h This matches the Intel documentation which shows them available by importing immintrin.h. x86intrin.h also includes immintrin.h so anyone including x86intrin.h will still get them. This is different than gcc, but I don't think we were a perfect match there already. I'm unclear what gcc's policy is about how they choose which to add things to. Differential Revision: https://reviews.llvm.org/D47182 Modified: cfe/trunk/lib/Headers/cldemoteintrin.h cfe/trunk/lib/Headers/clzerointrin.h cfe/trunk/lib/Headers/immintrin.h cfe/trunk/lib/Headers/movdirintrin.h cfe/trunk/lib/Headers/pconfigintrin.h cfe/trunk/lib/Headers/ptwriteintrin.h cfe/trunk/lib/Headers/rdseedintrin.h cfe/trunk/lib/Headers/sgxintrin.h cfe/trunk/lib/Headers/waitpkgintrin.h cfe/trunk/lib/Headers/wbnoinvdintrin.h cfe/trunk/lib/Headers/x86intrin.h Modified: cfe/trunk/lib/Headers/cldemoteintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/cldemoteintrin.h?rev=333110&r1=333109&r2=333110&view=diff == --- cfe/trunk/lib/Headers/cldemoteintrin.h (original) +++ cfe/trunk/lib/Headers/cldemoteintrin.h Wed May 23 11:32:58 2018 @@ -21,7 +21,7 @@ *===---=== */ -#ifndef __X86INTRIN_H +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif Modified: cfe/trunk/lib/Headers/clzerointrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/clzerointrin.h?rev=333110&r1=333109&r2=333110&view=diff == --- cfe/trunk/lib/Headers/clzerointrin.h (original) +++ cfe/trunk/lib/Headers/clzerointrin.h Wed May 23 11:32:58 2018 @@ -20,7 +20,7 @@ * *===---=== */ -#ifndef __X86INTRIN_H +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif Modified: cfe/trunk/lib/Headers/immintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=333110&r1=333109&r2=333110&view=diff == --- cfe/trunk/lib/Headers/immintrin.h (original) +++ cfe/trunk/lib/Headers/immintrin.h Wed May 23 11:32:58 2018 @@ -90,6 +90,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__) #include #endif @@ -339,4 +343,41 @@ _writegsbase_u64(unsigned long long __V) * whereas others are also available at all times. */ #include +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ + defined(__MOVDIRI__) || defined(__MOVDIR64B__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__) +#include +#endif + #endif /* __IMMINTRIN_H */ Modified: cfe/trunk/lib/Headers/movdirintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/movdirintrin.h?rev=333110&r1=333109&r2=333110&view=diff == --- cfe/trunk/lib/Headers/movdirintrin.h (original) +++ cfe/trunk/lib/Headers/movdirintrin.h Wed May 23 11:32:58 2018 @@ -20,7 +20,7 @@ * *===---=== */ -#ifndef __X86INTRIN_H +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H #error "Never use directly; include instead." #endif Modified: cfe/trunk/lib/Headers/pconfigintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/pconfigintrin.h?rev=333110&r1=333109&r2=333110&view=diff == --- cfe/trunk/lib/Headers/pconfigintrin.h (original) +++ cfe/trunk/lib/Headers/pconfigintrin.h Wed May 23 11:32:58 2018 @@ -21,7 +21,7 @@ *===---=== */ -#ifndef
r333124 - [X86] Move the include of clzerointrin.h from immintrin.h back to x86intrin.h.
Author: ctopper Date: Wed May 23 14:04:26 2018 New Revision: 333124 URL: http://llvm.org/viewvc/llvm-project?rev=333124&view=rev Log: [X86] Move the include of clzerointrin.h from immintrin.h back to x86intrin.h. This is an AMD intrinsic not an Intel intrinsic so it shouldn't be in immintrin.h Modified: cfe/trunk/lib/Headers/immintrin.h cfe/trunk/lib/Headers/x86intrin.h Modified: cfe/trunk/lib/Headers/immintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=333124&r1=333123&r2=333124&view=diff == --- cfe/trunk/lib/Headers/immintrin.h (original) +++ cfe/trunk/lib/Headers/immintrin.h Wed May 23 14:04:26 2018 @@ -347,10 +347,6 @@ _writegsbase_u64(unsigned long long __V) #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__) -#include -#endif - #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__) #include #endif Modified: cfe/trunk/lib/Headers/x86intrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/x86intrin.h?rev=333124&r1=333123&r2=333124&view=diff == --- cfe/trunk/lib/Headers/x86intrin.h (original) +++ cfe/trunk/lib/Headers/x86intrin.h Wed May 23 14:04:26 2018 @@ -60,4 +60,9 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__) +#include +#endif + + #endif /* __X86INTRIN_H */ ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333211 - [X86] Fix a bad cast in _mm512_mask_abs_epi32 and _mm512_maskz_abs_epi32.
Author: ctopper Date: Thu May 24 10:32:49 2018 New Revision: 333211 URL: http://llvm.org/viewvc/llvm-project?rev=333211&view=rev Log: [X86] Fix a bad cast in _mm512_mask_abs_epi32 and _mm512_maskz_abs_epi32. Modified: cfe/trunk/lib/Headers/avx512fintrin.h Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=333211&r1=333210&r2=333211&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Thu May 24 10:32:49 2018 @@ -1948,7 +1948,7 @@ _mm512_abs_epi32(__m512i __A) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { - return (__m512i)__builtin_ia32_selectd_512((__mmask8)__U, + return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_abs_epi32(__A), (__v16si)__W); } @@ -1956,7 +1956,7 @@ _mm512_mask_abs_epi32 (__m512i __W, __mm static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) { - return (__m512i)__builtin_ia32_selectd_512((__mmask8)__U, + return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_abs_epi32(__A), (__v16si)_mm512_setzero_si512()); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333318 - [X86] Mark a few more builtins const that were missed in r331814.
Author: ctopper Date: Fri May 25 15:07:43 2018 New Revision: 18 URL: http://llvm.org/viewvc/llvm-project?rev=18&view=rev Log: [X86] Mark a few more builtins const that were missed in r331814. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=18&r1=17&r2=18&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri May 25 15:07:43 2018 @@ -1237,9 +1237,9 @@ TARGET_BUILTIN(__builtin_ia32_pmaxsq256, TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4LLiV4LLiV4LLi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2LLiV2LLiV2LLi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4LLiV4LLiV4LLi", "n", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "nc", "avx512vl") ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333320 - [X86] Correct the target features on two avx512bw builtins that were incorrectly labeled as avx512f.
Author: ctopper Date: Fri May 25 15:43:20 2018 New Revision: 20 URL: http://llvm.org/viewvc/llvm-project?rev=20&view=rev Log: [X86] Correct the target features on two avx512bw builtins that were incorrectly labeled as avx512f. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=20&r1=19&r2=20&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri May 25 15:43:20 2018 @@ -1486,8 +1486,8 @@ TARGET_BUILTIN(__builtin_ia32_vcomisd, " TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kunpckdi, "ULLiULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32s*V32sUi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64c*V64cULLi", "n", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32s*V32sUi", "n", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64c*V64cULLi", "n", "avx512bw") TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8LLiIiUcIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8LLiIiUcIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_fixupimmps512_mask, "V16fV16fV16fV16iIiUsIi", "nc", "avx512f") ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333321 - [X86] Add const to another builtin that was missed from r331814.
Author: ctopper Date: Fri May 25 15:52:29 2018 New Revision: 21 URL: http://llvm.org/viewvc/llvm-project?rev=21&view=rev Log: [X86] Add const to another builtin that was missed from r331814. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=21&r1=20&r2=21&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri May 25 15:52:29 2018 @@ -1689,7 +1689,7 @@ TARGET_BUILTIN(__builtin_ia32_pmovusdw12 TARGET_BUILTIN(__builtin_ia32_pmovusdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2LLiV16cUc", "n", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2LLiV16cUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusqb128mem_mask, "vV16c*V2LLiUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusqb256_mask, "V16cV4LLiV16cUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovusqb256mem_mask, "vV16c*V4LLiUc", "n", "avx512vl") ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333348 - [X86] Remove mask from avx512ifma builtins. Use a select instruction instead.
Author: ctopper Date: Sat May 26 11:55:26 2018 New Revision: 48 URL: http://llvm.org/viewvc/llvm-project?rev=48&view=rev Log: [X86] Remove mask from avx512ifma builtins. Use a select instruction instead. This reduces from 12 builtins to 6 since we no longer need a mask and maskz version. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512ifmaintrin.h cfe/trunk/lib/Headers/avx512ifmavlintrin.h cfe/trunk/test/CodeGen/avx512ifma-builtins.c cfe/trunk/test/CodeGen/avx512ifmavl-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=48&r1=47&r2=48&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sat May 26 11:55:26 2018 @@ -1461,18 +1461,12 @@ TARGET_BUILTIN(__builtin_ia32_movdqa64lo TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512ifma") -TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512ifma") -TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512ifma") -TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512ifma") -TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512ifma,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8LLiV8LLiV8LLiV8LLi", "nc", "avx512ifma") +TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8LLiV8LLiV8LLiV8LLi", "nc", "avx512ifma") +TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2LLiV2LLiV2LLiV2LLi", "nc", "avx512ifma,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256, "V4LLiV4LLiV4LLiV4LLi", "nc", "avx512ifma,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2LLiV2LLiV2LLiV2LLi", "nc", "avx512ifma,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4LLiV4LLiV4LLiV4LLi", "nc", "avx512ifma,avx512vl") TARGET_BUILTIN(__builtin_ia32_vpermi2varqi512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512vbmi") TARGET_BUILTIN(__builtin_ia32_vpermt2varqi512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512vbmi") TARGET_BUILTIN(__builtin_ia32_vpermt2varqi512_maskz, "V64cV64cV64cV64cULLi", "nc", "avx512vbmi") Modified: cfe/trunk/lib/Headers/avx512ifmaintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512ifmaintrin.h?rev=48&r1=47&r2=48&view=diff == --- cfe/trunk/lib/Headers/avx512ifmaintrin.h (original) +++ cfe/trunk/lib/Headers/avx512ifmaintrin.h Sat May 26 11:55:26 2018 @@ -34,57 +34,47 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z) { - return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X, - (__v8di) __Y, - (__v8di) __Z, - (__mmask8) -1); + return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y, +(__v8di) __Z); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, - __m512i __Y) +_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __W, - (__v8di) __X, - (__v8di) __Y, - (__mmask8) __M); + return (__m512i)__builtin_ia32_selectq_512(__M, + (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), + (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { - r
r333387 - [X86] Merge the 3 different flavors of masked vpermi2var/vpermt2var builtins to a single version without masking. Use select builtins with appropriate operand instead.
Author: ctopper Date: Mon May 28 20:26:38 2018 New Revision: 87 URL: http://llvm.org/viewvc/llvm-project?rev=87&view=rev Log: [X86] Merge the 3 different flavors of masked vpermi2var/vpermt2var builtins to a single version without masking. Use select builtins with appropriate operand instead. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vbmiintrin.h cfe/trunk/lib/Headers/avx512vbmivlintrin.h cfe/trunk/lib/Headers/avx512vlbwintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vbmi-builtins.c cfe/trunk/test/CodeGen/avx512vbmivl-builtin.c cfe/trunk/test/CodeGen/avx512vl-builtins.c cfe/trunk/test/CodeGen/avx512vlbw-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=87&r1=86&r2=87&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 28 20:26:38 2018 @@ -969,10 +969,6 @@ TARGET_BUILTIN(__builtin_ia32_storeupd51 TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vV8d*V8dUc", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermt2vard512_mask, "V16iV16iV16iV16iUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni") TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni") @@ -1092,10 +1088,6 @@ TARGET_BUILTIN(__builtin_ia32_psubsw512_ TARGET_BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_vpermt2varhi512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_vpermt2varhi512_maskz, "V32sV32sV32sV32sUi", "nc", "avx512bw") - TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128_mask, "V2LLiV2LLiV2LLiUc", "nc", "avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256_mask, "V4LLiV4LLiV4LLiUc", "nc", "avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_vpconflictsi_128_mask, "V4iV4iV4iUc", "nc", "avx512cd,avx512vl") @@ -1123,13 +1115,6 @@ TARGET_BUILTIN(__builtin_ia32_vpshufbitq TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "nc", "avx512vl,avx512bitalg") TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "nc", "avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_maskz, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_vpermt2varhi256_mask, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_vpermt2varhi256_maskz, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512bw") - TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "nc", "avx512bw") @@ -1266,30 +1251,24 @@ TARGET_BUILTIN(__builtin_ia32_scattersiv TARGET_BUILTIN(__builtin_ia32_scattersiv8sf, "vf*UcV8iV8fIi", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_scattersiv8si, "vi*UcV8iV8iIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2vard128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2vard256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varpd128_mask, "V2dV2dV2LLiV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varpd256_mask, "V4dV4dV4LLiV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varps128_mask, "V4fV4fV4iV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varps256_mask, "V8fV8fV8iV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl") -TARGET
r333446 - [X86] Tag some 128/256 load/store instructions as requiring avx512vl instead of avx512f.
Author: ctopper Date: Tue May 29 11:23:22 2018 New Revision: 333446 URL: http://llvm.org/viewvc/llvm-project?rev=333446&view=rev Log: [X86] Tag some 128/256 load/store instructions as requiring avx512vl instead of avx512f. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=333446&r1=333445&r2=333446&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue May 29 11:23:22 2018 @@ -1428,18 +1428,18 @@ TARGET_BUILTIN(__builtin_ia32_psraw512, TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc", "n", "avx512f") +TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc", "n", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_movdqa32store512_mask, "vV16i*V16iUs", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_movdqa64load512_mask, "V8LLiV8LLiC*V8LLiUc", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_movdqa64store512_mask, "vV8LLi*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_movdqa32store128_mask, "vV4i*V4iUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_movdqa32store256_mask, "vV8i*V8iUc", "n", "avx512f") +TARGET_BUILTIN(__builtin_ia32_movdqa32store128_mask, "vV4i*V4iUc", "n", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa32store256_mask, "vV8i*V8iUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2LLiV2LLiC*V2LLiUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc", "n", "avx512f") +TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc", "n", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8LLiV8LLiV8LLiV8LLi", "nc", "avx512ifma") TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8LLiV8LLiV8LLiV8LLi", "nc", "avx512ifma") TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2LLiV2LLiV2LLiV2LLi", "nc", "avx512ifma,avx512vl") @@ -1484,8 +1484,8 @@ TARGET_BUILTIN(__builtin_ia32_loadss128_ TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddqusi128_mask, "V4iV4i*V4iUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loaddqusi256_mask, "V8iV8i*V8iUc", "n", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loaddqusi128_mask, "V4iV4i*V4iUc", "n", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddqusi256_mask, "V8iV8i*V8iUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_loadupd128_mask, "V2dV2d*V2dUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4d*V4dUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4f*V4fUc", "n", "avx512vl") ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333497 - [X86] Fix the names of a bunch of icelake intrinsics.
Author: ctopper Date: Tue May 29 20:38:15 2018 New Revision: 333497 URL: http://llvm.org/viewvc/llvm-project?rev=333497&view=rev Log: [X86] Fix the names of a bunch of icelake intrinsics. Mostly this fixes the names of all the 128-bit intrinsics to start with _mm_ instead of _mm128_ as is the convention and what the Intel docs say. This also fixes the name of the bitshuffle intrinsics to say epi64 for 128 and 256 bit versions. Modified: cfe/trunk/lib/Headers/avx512vlbitalgintrin.h cfe/trunk/lib/Headers/avx512vlvbmi2intrin.h cfe/trunk/lib/Headers/avx512vlvnniintrin.h cfe/trunk/test/CodeGen/avx512vlbitalg-builtins.c cfe/trunk/test/CodeGen/avx512vlvbmi2-builtins.c cfe/trunk/test/CodeGen/avx512vlvnni-builtins.c Modified: cfe/trunk/lib/Headers/avx512vlbitalgintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlbitalgintrin.h?rev=333497&r1=333496&r2=333497&view=diff == --- cfe/trunk/lib/Headers/avx512vlbitalgintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vlbitalgintrin.h Tue May 29 20:38:15 2018 @@ -54,23 +54,23 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U, } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_popcnt_epi16(__m128i __A) +_mm_popcnt_epi16(__m128i __A) { return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) +_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, - (__v8hi) _mm128_popcnt_epi16(__B), + (__v8hi) _mm_popcnt_epi16(__B), (__v8hi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) +_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) { - return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), + return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), __U, __B); } @@ -98,29 +98,29 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U, } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_popcnt_epi8(__m128i __A) +_mm_popcnt_epi8(__m128i __A) { return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) +_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, - (__v16qi) _mm128_popcnt_epi8(__B), + (__v16qi) _mm_popcnt_epi8(__B), (__v16qi) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) +_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) { - return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), + return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), __U, __B); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) +_mm256_mask_bitshuffle_epi64_mask(__mmask32 __U, __m256i __A, __m256i __B) { return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A, (__v32qi) __B, @@ -128,15 +128,15 @@ _mm256_mask_bitshuffle_epi32_mask(__mmas } static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B) +_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) { - return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1, + return _mm256_mask_bitshuffle_epi64_mask((__mmask32) -1, __A, __B); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) +_mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B) { return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A, (__v16qi) __B, @@ -144,9 +144,9 @@ _mm128_mask_bitshuffle_epi16_mask(__mmas } static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B) +_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) { - return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1, + return _mm_mask_bitshuffle_epi64_mask((__mmask16) -1, __A, __B); } Modified: cfe/trunk/lib/Headers/avx512vlvbmi2intrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlvbmi2intrin.h?rev=333497&r1=333496&r2=333497&view=diff == --- cfe/trunk/lib/Headers/avx512vlvbmi2intrin.h (original) +++ cfe/trunk/lib/Headers/avx512vlvbmi2intrin.h Tue May 29 20:38:15 2018 @@ -31,13 +31,8 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target_
r333509 - [X86] Remove masking from the AVX512VNNI builtins. Use a select in IR instead.
Author: ctopper Date: Tue May 29 22:26:04 2018 New Revision: 333509 URL: http://llvm.org/viewvc/llvm-project?rev=333509&view=rev Log: [X86] Remove masking from the AVX512VNNI builtins. Use a select in IR instead. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512vlvnniintrin.h cfe/trunk/lib/Headers/avx512vnniintrin.h cfe/trunk/test/CodeGen/avx512vlvnni-builtins.c cfe/trunk/test/CodeGen/avx512vnni-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=333509&r1=333508&r2=333509&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue May 29 22:26:04 2018 @@ -970,30 +970,18 @@ TARGET_BUILTIN(__builtin_ia32_storeapd51 TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "n", "avx512f") TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "nc", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "nc", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "nc", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "nc", "avx512vnni") TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi", "n", "avx512vl") Modified: cfe/trunk/lib/Headers/avx512vlvnniintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlvnniintrin.h?rev=333509&r1=333508&r2=333509&view=diff == --- cfe/trunk/lib/Headers/avx512vlvnniintrin.h (original) +++ cfe/trunk/lib/Headers/avx512
r315470 - [X86] Correct type for argument to clflushopt intrinsic.
Author: ctopper Date: Wed Oct 11 09:06:08 2017 New Revision: 315470 URL: http://llvm.org/viewvc/llvm-project?rev=315470&view=rev Log: [X86] Correct type for argument to clflushopt intrinsic. Summary: According to Intel docs this should take void const *. We had char*. The lack of const is the main issue. Reviewers: RKSimon, zvi, igorb Reviewed By: igorb Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D38782 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/clflushoptintrin.h Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=315470&r1=315469&r2=315470&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Oct 11 09:06:08 2017 @@ -639,7 +639,7 @@ TARGET_BUILTIN(__builtin_ia32_xsavec, "v TARGET_BUILTIN(__builtin_ia32_xsaves, "vv*ULLi", "", "xsaves") //CLFLUSHOPT -TARGET_BUILTIN(__builtin_ia32_clflushopt, "vc*", "", "clflushopt") +TARGET_BUILTIN(__builtin_ia32_clflushopt, "vvC*", "", "clflushopt") // ADX TARGET_BUILTIN(__builtin_ia32_addcarryx_u32, "UcUcUiUiUi*", "", "adx") Modified: cfe/trunk/lib/Headers/clflushoptintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/clflushoptintrin.h?rev=315470&r1=315469&r2=315470&view=diff == --- cfe/trunk/lib/Headers/clflushoptintrin.h (original) +++ cfe/trunk/lib/Headers/clflushoptintrin.h Wed Oct 11 09:06:08 2017 @@ -32,7 +32,7 @@ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clflushopt"))) static __inline__ void __DEFAULT_FN_ATTRS -_mm_clflushopt(char * __m) { +_mm_clflushopt(void const * __m) { __builtin_ia32_clflushopt(__m); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r315517 - [X86] Add support for 'amdfam17h' to __builtin_cpu_is to match gcc.
Author: ctopper Date: Wed Oct 11 14:42:02 2017 New Revision: 315517 URL: http://llvm.org/viewvc/llvm-project?rev=315517&view=rev Log: [X86] Add support for 'amdfam17h' to __builtin_cpu_is to match gcc. The compiler-rt implementation already supported it, it just wasn't exposed. Modified: cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/target-builtin-noerror.c Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=315517&r1=315516&r2=315517&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Wed Oct 11 14:42:02 2017 @@ -1293,6 +1293,7 @@ bool X86TargetInfo::validateCpuIs(String .Case("amd", true) .Case("amdfam10h", true) .Case("amdfam15h", true) + .Case("amdfam17h", true) .Case("atom", true) .Case("barcelona", true) .Case("bdver1", true) Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=315517&r1=315516&r2=315517&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Oct 11 14:42:02 2017 @@ -7501,6 +7501,7 @@ Value *CodeGenFunction::EmitX86CpuIs(Str INTEL_KNL, AMD_BTVER1, AMD_BTVER2, +AMDFAM17H, CPU_SUBTYPE_START, INTEL_COREI7_NEHALEM, INTEL_COREI7_WESTMERE, @@ -7527,6 +7528,7 @@ Value *CodeGenFunction::EmitX86CpuIs(Str .Case("amdfam10", AMDFAM10H) .Case("amdfam15h", AMDFAM15H) .Case("amdfam15", AMDFAM15H) + .Case("amdfam17h", AMDFAM17H) .Case("atom", INTEL_BONNELL) .Case("barcelona", AMDFAM10H_BARCELONA) .Case("bdver1", AMDFAM15H_BDVER1) Modified: cfe/trunk/test/CodeGen/target-builtin-noerror.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/target-builtin-noerror.c?rev=315517&r1=315516&r2=315517&view=diff == --- cfe/trunk/test/CodeGen/target-builtin-noerror.c (original) +++ cfe/trunk/test/CodeGen/target-builtin-noerror.c Wed Oct 11 14:42:02 2017 @@ -81,6 +81,7 @@ void verifycpustrings() { (void)__builtin_cpu_is("amd"); (void)__builtin_cpu_is("amdfam10h"); (void)__builtin_cpu_is("amdfam15h"); + (void)__builtin_cpu_is("amdfam17h"); (void)__builtin_cpu_is("atom"); (void)__builtin_cpu_is("barcelona"); (void)__builtin_cpu_is("bdver1"); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r315547 - [X86] Remove a few unnecessary check lines from the predefined-arch-macros test.
Author: ctopper Date: Wed Oct 11 19:06:17 2017 New Revision: 315547 URL: http://llvm.org/viewvc/llvm-project?rev=315547&view=rev Log: [X86] Remove a few unnecessary check lines from the predefined-arch-macros test. These were testing OS macros and clang/llvm macros. Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=315547&r1=315546&r2=315547&view=diff == --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original) +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Wed Oct 11 19:06:17 2017 @@ -1011,20 +1011,12 @@ // CHECK_GLM_M32: #define __XSAVEOPT__ 1 // CHECK_GLM_M32: #define __XSAVES__ 1 // CHECK_GLM_M32: #define __XSAVE__ 1 -// CHECK_GLM_M32: #define __clang__ 1 // CHECK_GLM_M32: #define __goldmont 1 // CHECK_GLM_M32: #define __goldmont__ 1 // CHECK_GLM_M32: #define __i386 1 // CHECK_GLM_M32: #define __i386__ 1 -// CHECK_GLM_M32: #define __linux 1 -// CHECK_GLM_M32: #define __linux__ 1 -// CHECK_GLM_M32: #define __llvm__ 1 // CHECK_GLM_M32: #define __tune_goldmont__ 1 -// CHECK_GLM_M32: #define __unix 1 -// CHECK_GLM_M32: #define __unix__ 1 // CHECK_GLM_M32: #define i386 1 -// CHECK_GLM_M32: #define linux 1 -// CHECK_GLM_M32: #define unix 1 // // RUN: %clang -march=goldmont -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ @@ -1049,19 +1041,11 @@ // CHECK_GLM_M64: #define __XSAVEOPT__ 1 // CHECK_GLM_M64: #define __XSAVES__ 1 // CHECK_GLM_M64: #define __XSAVE__ 1 -// CHECK_GLM_M64: #define __gnu_linux__ 1 // CHECK_GLM_M64: #define __goldmont 1 // CHECK_GLM_M64: #define __goldmont__ 1 -// CHECK_GLM_M64: #define __linux 1 -// CHECK_GLM_M64: #define __linux__ 1 -// CHECK_GLM_M64: #define __llvm__ 1 // CHECK_GLM_M64: #define __tune_goldmont__ 1 -// CHECK_GLM_M64: #define __unix 1 -// CHECK_GLM_M64: #define __unix__ 1 // CHECK_GLM_M64: #define __x86_64 1 // CHECK_GLM_M64: #define __x86_64__ 1 -// CHECK_GLM_M64: #define linux 1 -// CHECK_GLM_M64: #define unix 1 // // RUN: %clang -march=slm -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r315594 - [X86] Use -ffreestanding instead of using the mm_malloc.h include guard hack on more of the builtin tests.
Author: ctopper Date: Thu Oct 12 10:21:01 2017 New Revision: 315594 URL: http://llvm.org/viewvc/llvm-project?rev=315594&view=rev Log: [X86] Use -ffreestanding instead of using the mm_malloc.h include guard hack on more of the builtin tests. Modified: cfe/trunk/test/CodeGen/adc-builtins.c cfe/trunk/test/CodeGen/avx512ifmavl-builtins.c cfe/trunk/test/CodeGen/builtin-clflushopt.c cfe/trunk/test/CodeGen/builtin-clzero.c Modified: cfe/trunk/test/CodeGen/adc-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/adc-builtins.c?rev=315594&r1=315593&r2=315594&view=diff == --- cfe/trunk/test/CodeGen/adc-builtins.c (original) +++ cfe/trunk/test/CodeGen/adc-builtins.c Thu Oct 12 10:21:01 2017 @@ -1,6 +1,4 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s - -#define __MM_MALLOC_H +// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s #include Modified: cfe/trunk/test/CodeGen/avx512ifmavl-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512ifmavl-builtins.c?rev=315594&r1=315593&r2=315594&view=diff == --- cfe/trunk/test/CodeGen/avx512ifmavl-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512ifmavl-builtins.c Thu Oct 12 10:21:01 2017 @@ -1,6 +1,4 @@ -// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s - -#define __MM_MALLOC_H +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-apple-darwin -target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s #include Modified: cfe/trunk/test/CodeGen/builtin-clflushopt.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtin-clflushopt.c?rev=315594&r1=315593&r2=315594&view=diff == --- cfe/trunk/test/CodeGen/builtin-clflushopt.c (original) +++ cfe/trunk/test/CodeGen/builtin-clflushopt.c Thu Oct 12 10:21:01 2017 @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +clflushopt -emit-llvm -o - -Wall -Werror | FileCheck %s -#define __MM_MALLOC_H +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-apple-darwin -target-feature +clflushopt -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include -#include void test_mm_clflushopt(char * __m) { //CHECK-LABEL: @test_mm_clflushopt //CHECK: @llvm.x86.clflushopt Modified: cfe/trunk/test/CodeGen/builtin-clzero.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtin-clzero.c?rev=315594&r1=315593&r2=315594&view=diff == --- cfe/trunk/test/CodeGen/builtin-clzero.c (original) +++ cfe/trunk/test/CodeGen/builtin-clzero.c Thu Oct 12 10:21:01 2017 @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +clzero -emit-llvm -o - -Wall -Werror | FileCheck %s -#define __MM_MALLOC_H +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-apple-darwin -target-feature +clzero -emit-llvm -o - -Wall -Werror | FileCheck %s #include + void test_mm_clzero(void * __m) { //CHECK-LABEL: @test_mm_clzero //CHECK: @llvm.x86.clzero ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r315607 - [X86] Add CLWB intrinsic. clang part
Author: ctopper Date: Thu Oct 12 11:57:15 2017 New Revision: 315607 URL: http://llvm.org/viewvc/llvm-project?rev=315607&view=rev Log: [X86] Add CLWB intrinsic. clang part Reviewers: RKSimon, zvi, igorb Reviewed By: RKSimon Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D38781 Added: cfe/trunk/lib/Headers/clwbintrin.h cfe/trunk/test/CodeGen/builtin-clwb.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/CMakeLists.txt cfe/trunk/lib/Headers/immintrin.h Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=315607&r1=315606&r2=315607&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu Oct 12 11:57:15 2017 @@ -641,6 +641,9 @@ TARGET_BUILTIN(__builtin_ia32_xsaves, "v //CLFLUSHOPT TARGET_BUILTIN(__builtin_ia32_clflushopt, "vvC*", "", "clflushopt") +//CLWB +TARGET_BUILTIN(__builtin_ia32_clwb, "vvC*", "", "clwb") + // ADX TARGET_BUILTIN(__builtin_ia32_addcarryx_u32, "UcUcUiUiUi*", "", "adx") TARGET_BUILTIN(__builtin_ia32_addcarry_u32, "UcUcUiUiUi*", "", "") Modified: cfe/trunk/lib/Headers/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/CMakeLists.txt?rev=315607&r1=315606&r2=315607&view=diff == --- cfe/trunk/lib/Headers/CMakeLists.txt (original) +++ cfe/trunk/lib/Headers/CMakeLists.txt Thu Oct 12 11:57:15 2017 @@ -33,6 +33,7 @@ set(files clzerointrin.h cpuid.h clflushoptintrin.h + clwbintrin.h emmintrin.h f16cintrin.h float.h Added: cfe/trunk/lib/Headers/clwbintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/clwbintrin.h?rev=315607&view=auto == --- cfe/trunk/lib/Headers/clwbintrin.h (added) +++ cfe/trunk/lib/Headers/clwbintrin.h Thu Oct 12 11:57:15 2017 @@ -0,0 +1,52 @@ +/*=== clwbintrin.h - CLWB intrinsic === + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===---=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __CLWBINTRIN_H +#define __CLWBINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("clwb"))) + +/// \brief Writes back to memory the cache line (if modified) that contains the +/// linear address specified in \a __p from any level of the cache hierarchy in +/// the cache coherence domain +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CLWB instruction. +/// +/// \param __p +///A pointer to the memory location used to identify the cache line to be +///written back. +static __inline__ void __DEFAULT_FN_ATTRS +_mm_clwb(void const *__p) { + __builtin_ia32_clwb(__p); +} + +#undef __DEFAULT_FN_ATTRS + +#endif Modified: cfe/trunk/lib/Headers/immintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=315607&r1=315606&r2=315607&view=diff == --- cfe/trunk/lib/Headers/immintrin.h (original) +++ cfe/trunk/lib/Headers/immintrin.h Thu Oct 12 11:57:15 2017 @@ -58,6 +58,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__) #include #endif Added: cfe/trunk/test/CodeGen/builtin-clwb.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtin-clwb.c?rev=315607&view=auto ===
r315723 - [X86] Add skeleton support for knm cpu
Author: ctopper Date: Fri Oct 13 11:14:24 2017 New Revision: 315723 URL: http://llvm.org/viewvc/llvm-project?rev=315723&view=rev Log: [X86] Add skeleton support for knm cpu This adds support Knights Mill CPU. Preprocessor defines match gcc's implementation. Differential Revision: https://reviews.llvm.org/D38813 Modified: cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/lib/Basic/Targets/X86.h cfe/trunk/test/Driver/x86-march.c cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=315723&r1=315722&r2=315723&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Fri Oct 13 11:14:24 2017 @@ -229,6 +229,8 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "cx16", true); break; + case CK_KNM: +// TODO: Add avx5124fmaps/avx5124vnniw. case CK_KNL: setFeatureEnabledImpl(Features, "avx512f", true); setFeatureEnabledImpl(Features, "avx512cd", true); @@ -853,6 +855,8 @@ void X86TargetInfo::getTargetDefines(con case CK_KNL: defineCPUMacros(Builder, "knl"); break; + case CK_KNM: +break; case CK_Lakemont: Builder.defineMacro("__tune_lakemont__"); break; @@ -1553,6 +1557,7 @@ X86TargetInfo::CPUKind X86TargetInfo::ge .Cases("skylake-avx512", "skx", CK_SkylakeServer) .Case("cannonlake", CK_Cannonlake) .Case("knl", CK_KNL) + .Case("knm", CK_KNM) .Case("lakemont", CK_Lakemont) .Case("k6", CK_K6) .Case("k6-2", CK_K6_2) Modified: cfe/trunk/lib/Basic/Targets/X86.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.h?rev=315723&r1=315722&r2=315723&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.h (original) +++ cfe/trunk/lib/Basic/Targets/X86.h Fri Oct 13 11:14:24 2017 @@ -203,6 +203,10 @@ class LLVM_LIBRARY_VISIBILITY X86TargetI /// Knights Landing processor. CK_KNL, +/// \name Knights Mill +/// Knights Mill processor. +CK_KNM, + /// \name Lakemont /// Lakemont microarchitecture based processors. CK_Lakemont, @@ -321,6 +325,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetI case CK_SkylakeServer: case CK_Cannonlake: case CK_KNL: +case CK_KNM: case CK_K8: case CK_K8SSE3: case CK_AMDFAM10: Modified: cfe/trunk/test/Driver/x86-march.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/x86-march.c?rev=315723&r1=315722&r2=315723&view=diff == --- cfe/trunk/test/Driver/x86-march.c (original) +++ cfe/trunk/test/Driver/x86-march.c Fri Oct 13 11:14:24 2017 @@ -52,6 +52,10 @@ // RUN: | FileCheck %s -check-prefix=knl // knl: "-target-cpu" "knl" // +// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=knm 2>&1 \ +// RUN: | FileCheck %s -check-prefix=knm +// knm: "-target-cpu" "knm" +// // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=cannonlake 2>&1 \ // RUN: | FileCheck %s -check-prefix=cannonlake // cannonlake: "-target-cpu" "cannonlake" Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=315723&r1=315722&r2=315723&view=diff == --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original) +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Fri Oct 13 11:14:24 2017 @@ -783,6 +783,81 @@ // CHECK_KNL_M64: #define __tune_knl__ 1 // CHECK_KNL_M64: #define __x86_64 1 // CHECK_KNL_M64: #define __x86_64__ 1 + +// RUN: %clang -march=knm -m32 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_KNM_M32 +// CHECK_KNM_M32: #define __AES__ 1 +// CHECK_KNM_M32: #define __AVX2__ 1 +// CHECK_KNM_M32: #define __AVX512CD__ 1 +// CHECK_KNM_M32: #define __AVX512ER__ 1 +// CHECK_KNM_M32: #define __AVX512F__ 1 +// CHECK_KNM_M32: #define __AVX512PF__ 1 +// CHECK_KNM_M32: #define __AVX__ 1 +// CHECK_KNM_M32: #define __BMI2__ 1 +// CHECK_KNM_M32: #define __BMI__ 1 +// CHECK_KNM_M32: #define __F16C__ 1 +// CHECK_KNM_M32: #define __FMA__ 1 +// CHECK_KNM_M32: #define __LZCNT__ 1 +// CHECK_KNM_M32: #define __MMX__ 1 +// CHECK_KNM_M32: #define __PCLMUL__ 1 +// CHECK_KNM_M32: #define __POPCNT__ 1 +// CHECK_KNM_M32: #define __PREFETCHWT1__ 1 +// CHECK_KNM_M32: #define __RDRND__ 1 +// CHECK_KNM_M32: #define __RTM__ 1 +// CHECK_KNM_M32: #define __SSE2__ 1 +// CHECK_KNM_M32: #define __SSE3__ 1 +// CHECK_KNM_M32: #define __SSE4_1__ 1 +// CHECK_KNM_M32: #define __SSE4_2__ 1 +// CHECK_KNM_M32: #define __SSE__ 1 +// CHECK_K
r315729 - [X86] Remove 'knm' defines from predefined-arch-macros.c test.
Author: ctopper Date: Fri Oct 13 11:38:10 2017 New Revision: 315729 URL: http://llvm.org/viewvc/llvm-project?rev=315729&view=rev Log: [X86] Remove 'knm' defines from predefined-arch-macros.c test. Direction seems to be that we dont' want to keep adding these, but I forgot to remove it from the test before I committed r315723. Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=315729&r1=315728&r2=315729&view=diff == --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original) +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Fri Oct 13 11:38:10 2017 @@ -815,9 +815,6 @@ // CHECK_KNM_M32: #define __XSAVE__ 1 // CHECK_KNM_M32: #define __i386 1 // CHECK_KNM_M32: #define __i386__ 1 -// CHECK_KNM_M32: #define __knm 1 -// CHECK_KNM_M32: #define __knm__ 1 -// CHECK_KNM_M32: #define __tune_knm__ 1 // CHECK_KNM_M32: #define i386 1 // RUN: %clang -march=knm -m64 -E -dM %s -o - 2>&1 \ @@ -853,9 +850,6 @@ // CHECK_KNM_M64: #define __XSAVE__ 1 // CHECK_KNM_M64: #define __amd64 1 // CHECK_KNM_M64: #define __amd64__ 1 -// CHECK_KNM_M64: #define __knm 1 -// CHECK_KNM_M64: #define __knm__ 1 -// CHECK_KNM_M64: #define __tune_knm__ 1 // CHECK_KNM_M64: #define __x86_64 1 // CHECK_KNM_M64: #define __x86_64__ 1 // ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r340713 - [X86] Don't set min_vector_width to 512 on intrinsics that only operate on k registers.
Author: ctopper Date: Sun Aug 26 22:27:15 2018 New Revision: 340713 URL: http://llvm.org/viewvc/llvm-project?rev=340713&view=rev Log: [X86] Don't set min_vector_width to 512 on intrinsics that only operate on k registers. Modified: cfe/trunk/lib/Headers/avx512fintrin.h Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=340713&r1=340712&r2=340713&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Sun Aug 26 22:27:15 2018 @@ -175,6 +175,7 @@ typedef enum /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512))) #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) /* Create vectors with repeated elements */ @@ -508,13 +509,13 @@ _mm512_castsi512_si256 (__m512i __A) return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_int2mask(int __a) { return (__mmask16)__a; } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask2int(__mmask16 __a) { return (int)__a; @@ -4580,7 +4581,7 @@ _mm512_store_epi64 (void *__P, __m512i _ /* Mask ops */ -static __inline __mmask16 __DEFAULT_FN_ATTRS512 +static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M) { return __builtin_ia32_knothi(__M); @@ -5622,7 +5623,7 @@ _mm_maskz_getexp_ss (__mmask8 __U, __m12 (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov (__mmask16 __A) { return __A; @@ -8320,49 +8321,49 @@ _mm512_mask_permutexvar_epi32 (__m512i _ #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 -static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kand (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kandn (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kor (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc (__mmask16 __A, __mmask16 __B) { return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ int __DEFAULT_FN_ATTRS512 +static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz (__mmask16 __A, __mmask16 __B) { return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxnor (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 +static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxor (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r340714 - [X86] Undef __DEFAULT_FN_ATTRS in avx512fintrin.h.
Author: ctopper Date: Sun Aug 26 22:44:45 2018 New Revision: 340714 URL: http://llvm.org/viewvc/llvm-project?rev=340714&view=rev Log: [X86] Undef __DEFAULT_FN_ATTRS in avx512fintrin.h. Fixes test failure after r340713 Modified: cfe/trunk/lib/Headers/avx512fintrin.h Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=340714&r1=340713&r2=340714&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Sun Aug 26 22:44:45 2018 @@ -9595,5 +9595,6 @@ _mm512_mask_reduce_min_ps(__mmask16 __M, #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS #endif /* __AVX512FINTRIN_H */ ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r340717 - [X86] Rename __DEFAULT_FN_ATTRS to a__DEFAULT_FN_ATTRS512 in avx512dqintrin.h and avx512bwintrin.h.
Author: ctopper Date: Sun Aug 26 23:20:19 2018 New Revision: 340717 URL: http://llvm.org/viewvc/llvm-project?rev=340717&view=rev Log: [X86] Rename __DEFAULT_FN_ATTRS to a__DEFAULT_FN_ATTRS512 in avx512dqintrin.h and avx512bwintrin.h. This is preparation for adding removing min_vector_width 512 from some intrinsics. Modified: cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512dqintrin.h Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=340717&r1=340716&r2=340717&view=diff == --- cfe/trunk/lib/Headers/avx512bwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bwintrin.h Sun Aug 26 23:20:19 2018 @@ -32,7 +32,7 @@ typedef unsigned int __mmask32; typedef unsigned long long __mmask64; /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) /* Integer compare */ @@ -176,102 +176,102 @@ typedef unsigned long long __mmask64; #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi8 (__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A + (__v64qu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi8 (__m512i __A, __m512i __B) { return (__m512i) ((__v64qu) __A - (__v64qu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A + (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi16 (__m512i __A, __m512i __B) { return (__m512i) ((__v32hu) __A - (__v32hu) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sub_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i
r340719 - [X86] Add intrinsics for kand/kandn/knot/kor/kxnor/kxor with 8, 32, and 64-bit mask registers.
Author: ctopper Date: Sun Aug 26 23:20:22 2018 New Revision: 340719 URL: http://llvm.org/viewvc/llvm-project?rev=340719&view=rev Log: [X86] Add intrinsics for kand/kandn/knot/kor/kxnor/kxor with 8, 32, and 64-bit mask registers. This also adds a second intrinsic name for the 16-bit mask versions. These intrinsics match gcc and icc. They just aren't published in the Intel Intrinsics Guide so I only recently found they existed. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512dq-builtins.c cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=340719&r1=340718&r2=340719&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sun Aug 26 23:20:22 2018 @@ -1005,7 +1005,10 @@ TARGET_BUILTIN(__builtin_ia32_scatterpfd TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiLLi*IiIi", "nV:512:", "avx512pf") TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLii*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_knotdi, "ULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl") @@ -1734,14 +1737,29 @@ TARGET_BUILTIN(__builtin_ia32_fpclassps5 TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", "avx512dq") TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", "avx512dq") TARGET_BUILTIN(__builtin_ia32_fpcla_mask, "UcV4fIiUc", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kanddi, "ULLiULLiULLi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kandndi, "ULLiULLiULLi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kordi, "ULLiULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kxnordi, "ULLiULLiULLi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kxordi, "ULLiULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=340719&r1=340718&r2=340719&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sun Aug 26 23:20:22 2018 @@ -8603,8 +8603,9 @@ static Value *EmitX86CompressStore(CodeG } static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, - unsigned NumElts, ArrayRef Ops, + ArrayRef Ops, bool InvertLHS = false) { + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); @@ -10013,7 +10014,7 @@ Value *CodeGenFunction::EmitX86BuiltinEx case X8
r340718 - [X86] Remove min_vector_width 512 from some intrinsics that operate only on k-registers.
Author: ctopper Date: Sun Aug 26 23:20:20 2018 New Revision: 340718 URL: http://llvm.org/viewvc/llvm-project?rev=340718&view=rev Log: [X86] Remove min_vector_width 512 from some intrinsics that operate only on k-registers. Modified: cfe/trunk/lib/Headers/avx512bwintrin.h Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=340718&r1=340717&r2=340718&view=diff == --- cfe/trunk/lib/Headers/avx512bwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bwintrin.h Sun Aug 26 23:20:20 2018 @@ -33,6 +33,7 @@ typedef unsigned long long __mmask64; /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) /* Integer compare */ @@ -1792,5 +1793,6 @@ _mm512_sad_epu8 (__m512i __A, __m512i __ } #undef __DEFAULT_FN_ATTRS512 +#undef __DEFAULT_FN_ATTRS #endif ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r340798 - [X86] Add kortest intrinsics for 8, 32, and 64 bit masks. Add new intrinsic names for 16 bit masks.
Author: ctopper Date: Mon Aug 27 23:28:25 2018 New Revision: 340798 URL: http://llvm.org/viewvc/llvm-project?rev=340798&view=rev Log: [X86] Add kortest intrinsics for 8, 32, and 64 bit masks. Add new intrinsic names for 16 bit masks. This matches gcc and icc despite not being documented in the Intel Intrinsics Guide. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512dq-builtins.c cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=340798&r1=340797&r2=340798&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon Aug 27 23:28:25 2018 @@ -1749,8 +1749,14 @@ TARGET_BUILTIN(__builtin_ia32_korqi, "Uc TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kordi, "ULLiULLiULLi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kortestcqi, "iUcUc", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kortestzqi, "iUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iULLiULLi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=340798&r1=340797&r2=340798&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Aug 27 23:28:25 2018 @@ -10012,14 +10012,21 @@ Value *CodeGenFunction::EmitX86BuiltinEx return EmitX86MaskedCompare(*this, CC, false, Ops); } + case X86::BI__builtin_ia32_kortestcqi: case X86::BI__builtin_ia32_kortestchi: - case X86::BI__builtin_ia32_kortestzhi: { + case X86::BI__builtin_ia32_kortestcsi: + case X86::BI__builtin_ia32_kortestcdi: { Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); -Value *C; -if (BuiltinID == X86::BI__builtin_ia32_kortestchi) - C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty()); -else - C = llvm::Constant::getNullValue(Builder.getInt16Ty()); +Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType()); +Value *Cmp = Builder.CreateICmpEQ(Or, C); +return Builder.CreateZExt(Cmp, ConvertType(E->getType())); + } + case X86::BI__builtin_ia32_kortestzqi: + case X86::BI__builtin_ia32_kortestzhi: + case X86::BI__builtin_ia32_kortestzsi: + case X86::BI__builtin_ia32_kortestzdi: { +Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); +Value *C = llvm::Constant::getNullValue(Ops[0]->getType()); Value *Cmp = Builder.CreateICmpEQ(Or, C); return Builder.CreateZExt(Cmp, ConvertType(E->getType())); } Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=340798&r1=340797&r2=340798&view=diff == --- cfe/trunk/lib/Headers/avx512bwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bwintrin.h Mon Aug 27 23:28:25 2018 @@ -107,6 +107,42 @@ _kxor_mask64(__mmask64 __A, __mmask64 __ return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); } +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) +{ + return (unsigned char)__builtin_ia32_kortestcsi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) +{ + return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { + *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B); + return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); +} + +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) +{ + return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); +} + +st
r340879 - [X86] Add kadd intrinsics to match gcc and icc.
Author: ctopper Date: Tue Aug 28 15:32:14 2018 New Revision: 340879 URL: http://llvm.org/viewvc/llvm-project?rev=340879&view=rev Log: [X86] Add kadd intrinsics to match gcc and icc. This adds the following intrinsics: _kadd_mask64 _kadd_mask32 _kadd_mask16 _kadd_mask8 These are missing from the Intel Intrinsics Guide, but are implemented by both gcc and icc. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512dq-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=340879&r1=340878&r2=340879&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue Aug 28 15:32:14 2018 @@ -1737,6 +1737,10 @@ TARGET_BUILTIN(__builtin_ia32_fpclassps5 TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", "avx512dq") TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", "avx512dq") TARGET_BUILTIN(__builtin_ia32_fpcla_mask, "UcV4fIiUc", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kaddqi, "UcUcUc", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kaddhi, "UsUsUs", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kaddsi, "UiUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kadddi, "ULLiULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=340879&r1=340878&r2=340879&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Aug 28 15:32:14 2018 @@ -8613,7 +8613,7 @@ static Value *EmitX86MaskLogic(CodeGenFu LHS = CGF.Builder.CreateNot(LHS); return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS), - CGF.Builder.getIntNTy(std::max(NumElts, 8U))); + Ops[0]->getType()); } static Value *EmitX86Select(CodeGenFunction &CGF, @@ -10031,6 +10031,34 @@ Value *CodeGenFunction::EmitX86BuiltinEx return Builder.CreateZExt(Cmp, ConvertType(E->getType())); } + case X86::BI__builtin_ia32_kaddqi: + case X86::BI__builtin_ia32_kaddhi: + case X86::BI__builtin_ia32_kaddsi: + case X86::BI__builtin_ia32_kadddi: { +Intrinsic::ID IID; +switch (BuiltinID) { +default: llvm_unreachable("Unsupported intrinsic!"); +case X86::BI__builtin_ia32_kaddqi: + IID = Intrinsic::x86_avx512_kadd_b; + break; +case X86::BI__builtin_ia32_kaddhi: + IID = Intrinsic::x86_avx512_kadd_w; + break; +case X86::BI__builtin_ia32_kaddsi: + IID = Intrinsic::x86_avx512_kadd_d; + break; +case X86::BI__builtin_ia32_kadddi: + IID = Intrinsic::x86_avx512_kadd_q; + break; +} + +unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); +Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); +Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); +Function *Intr = CGM.getIntrinsic(IID); +Value *Res = Builder.CreateCall(Intr, {LHS, RHS}); +return Builder.CreateBitCast(Res, Ops[0]->getType()); + } case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: case X86::BI__builtin_ia32_kandsi: Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=340879&r1=340878&r2=340879&view=diff == --- cfe/trunk/lib/Headers/avx512bwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bwintrin.h Tue Aug 28 15:32:14 2018 @@ -143,6 +143,18 @@ _kortest_mask64_u8(__mmask64 __A, __mmas return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); } +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_kadd_mask32(__mmask32 __A, __mmask32 __B) +{ + return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_kadd_mask64(__mmask64 __A, __mmask64 __B) +{ + return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); +} + /* Integer compare */ #define _mm512_cmp_epi8_mask(a, b, p) \ Modified: cfe/trunk/lib/Headers/avx512dqintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=340879&r1=340878&r2=340879&view=diff ==
r341234 - [X86] Add kshift intrinsics to match gcc and icc.
Author: ctopper Date: Fri Aug 31 11:22:52 2018 New Revision: 341234 URL: http://llvm.org/viewvc/llvm-project?rev=341234&view=rev Log: [X86] Add kshift intrinsics to match gcc and icc. This adds the following intrinsics: _kshiftli_mask8 _kshiftli_mask16 _kshiftli_mask32 _kshiftli_mask64 _kshiftri_mask8 _kshiftri_mask16 _kshiftri_mask32 _kshiftri_mask64 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Sema/SemaChecking.cpp cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512dq-builtins.c cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=341234&r1=341233&r2=341234&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Aug 31 11:22:52 2018 @@ -1770,6 +1770,14 @@ TARGET_BUILTIN(__builtin_ia32_kxorqi, "U TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kxordi, "ULLiULLiULLi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kshiftliqi, "UcUcIUi", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kshiftlihi, "UsUsIUi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kshiftlisi, "UiUiIUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "ULLiULLiIUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kshiftriqi, "UcUcIUi", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kshiftrihi, "UsUsIUi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kshiftrisi, "UiUiIUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kshiftridi, "ULLiULLiIUi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=341234&r1=341233&r2=341234&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Aug 31 11:22:52 2018 @@ -9929,6 +9929,50 @@ Value *CodeGenFunction::EmitX86BuiltinEx "psrldq"); return Builder.CreateBitCast(SV, ResultType, "cast"); } + case X86::BI__builtin_ia32_kshiftliqi: + case X86::BI__builtin_ia32_kshiftlihi: + case X86::BI__builtin_ia32_kshiftlisi: + case X86::BI__builtin_ia32_kshiftlidi: { +unsigned ShiftVal = cast(Ops[1])->getZExtValue() & 0xff; +unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + +if (ShiftVal >= NumElts) + return llvm::Constant::getNullValue(Ops[0]->getType()); + +Value *In = getMaskVecValue(*this, Ops[0], NumElts); + +uint32_t Indices[64]; +for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = NumElts + i - ShiftVal; + +Value *Zero = llvm::Constant::getNullValue(In->getType()); +Value *SV = Builder.CreateShuffleVector(Zero, In, +makeArrayRef(Indices, NumElts), +"kshiftl"); +return Builder.CreateBitCast(SV, Ops[0]->getType()); + } + case X86::BI__builtin_ia32_kshiftriqi: + case X86::BI__builtin_ia32_kshiftrihi: + case X86::BI__builtin_ia32_kshiftrisi: + case X86::BI__builtin_ia32_kshiftridi: { +unsigned ShiftVal = cast(Ops[1])->getZExtValue() & 0xff; +unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + +if (ShiftVal >= NumElts) + return llvm::Constant::getNullValue(Ops[0]->getType()); + +Value *In = getMaskVecValue(*this, Ops[0], NumElts); + +uint32_t Indices[64]; +for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i + ShiftVal; + +Value *Zero = llvm::Constant::getNullValue(In->getType()); +Value *SV = Builder.CreateShuffleVector(In, Zero, +makeArrayRef(Indices, NumElts), +"kshiftr"); +return Builder.CreateBitCast(SV, Ops[0]->getType()); + } case X86::BI__builtin_ia32_movnti: case X86::BI__builtin_ia32_movnti64: case X86::BI__builtin_ia32_movntsd: Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=341234&r1=341233&r2=341234&view=diff == --- cfe/trunk/lib/Headers
r341251 - [X86] Add k-mask conversion and load/store instrinsics to match gcc and icc.
Author: ctopper Date: Fri Aug 31 13:41:06 2018 New Revision: 341251 URL: http://llvm.org/viewvc/llvm-project?rev=341251&view=rev Log: [X86] Add k-mask conversion and load/store instrinsics to match gcc and icc. This adds: _cvtmask8_u32, _cvtmask16_u32, _cvtmask32_u32, _cvtmask64_u64 _cvtu32_mask8, _cvtu32_mask16, _cvtu32_mask32, _cvtu64_mask64 _load_mask8, _load_mask16, _load_mask32, _load_mask64 _store_mask8, _store_mask16, _store_mask32, _store_mask64 These are currently missing from the Intel Intrinsics Guide webpage. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512dq-builtins.c cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=341251&r1=341250&r2=341251&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Aug 31 13:41:06 2018 @@ -1778,6 +1778,10 @@ TARGET_BUILTIN(__builtin_ia32_kshiftriqi TARGET_BUILTIN(__builtin_ia32_kshiftrihi, "UsUsIUi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kshiftrisi, "UiUiIUi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kshiftridi, "ULLiULLiIUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kmovb, "UcUc", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_kmovw, "UsUs", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_kmovd, "UiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_kmovq, "ULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=341251&r1=341250&r2=341251&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Aug 31 13:41:06 2018 @@ -10137,6 +10137,17 @@ Value *CodeGenFunction::EmitX86BuiltinEx return Builder.CreateBitCast(Builder.CreateNot(Res), Ops[0]->getType()); } + case X86::BI__builtin_ia32_kmovb: + case X86::BI__builtin_ia32_kmovw: + case X86::BI__builtin_ia32_kmovd: + case X86::BI__builtin_ia32_kmovq: { +// Bitcast to vXi1 type and then back to integer. This gets the mask +// register type into the IR, but might be optimized out depending on +// what's around it. +unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); +Value *Res = getMaskVecValue(*this, Ops[0], NumElts); +return Builder.CreateBitCast(Res, Ops[0]->getType()); + } case X86::BI__builtin_ia32_kunpckdi: case X86::BI__builtin_ia32_kunpcksi: Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=341251&r1=341250&r2=341251&view=diff == --- cfe/trunk/lib/Headers/avx512bwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bwintrin.h Fri Aug 31 13:41:06 2018 @@ -167,6 +167,46 @@ _kadd_mask64(__mmask64 __A, __mmask64 __ #define _kshiftri_mask64(A, I) \ (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)) +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_cvtmask32_u32(__mmask32 __A) { + return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A); +} + +static __inline__ unsigned long long __DEFAULT_FN_ATTRS +_cvtmask64_u64(__mmask64 __A) { + return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_cvtu32_mask32(unsigned int __A) { + return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_cvtu64_mask64(unsigned long long __A) { + return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_load_mask32(__mmask32 *__A) { + return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_load_mask64(__mmask64 *__A) { + return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_store_mask32(__mmask32 *__A, __mmask32 __B) { + *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_store_mask64(__mmask64 *__A, __mmask64 __B) { + *(__mmask64 *)__A = __builtin_ia32
r341265 - [X86] Add ktest intrinsics to match gcc and icc.
Author: ctopper Date: Fri Aug 31 15:29:56 2018 New Revision: 341265 URL: http://llvm.org/viewvc/llvm-project?rev=341265&view=rev Log: [X86] Add ktest intrinsics to match gcc and icc. These aren't documented in the Intel Intrinsics Guide, but are supported by gcc and icc. Includes these intrinsics: _ktestc_mask8_u8, _ktestz_mask8_u8, _ktest_mask8_u8 _ktestc_mask16_u8, _ktestz_mask16_u8, _ktest_mask16_u8 _ktestc_mask32_u8, _ktestz_mask32_u8, _ktest_mask32_u8 _ktestc_mask64_u8, _ktestz_mask64_u8, _ktest_mask64_u8 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512dq-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=341265&r1=341264&r2=341265&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Aug 31 15:29:56 2018 @@ -1761,6 +1761,14 @@ TARGET_BUILTIN(__builtin_ia32_kortestcsi TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iULLiULLi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_ktestcqi, "iUcUc", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_ktestzqi, "iUcUc", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_ktestchi, "iUsUs", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_ktestzhi, "iUsUs", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_ktestcsi, "iUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_ktestzsi, "iUiUi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iULLiULLi", "nc", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=341265&r1=341264&r2=341265&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Aug 31 15:29:56 2018 @@ -10075,6 +10075,50 @@ Value *CodeGenFunction::EmitX86BuiltinEx return Builder.CreateZExt(Cmp, ConvertType(E->getType())); } + case X86::BI__builtin_ia32_ktestcqi: + case X86::BI__builtin_ia32_ktestzqi: + case X86::BI__builtin_ia32_ktestchi: + case X86::BI__builtin_ia32_ktestzhi: + case X86::BI__builtin_ia32_ktestcsi: + case X86::BI__builtin_ia32_ktestzsi: + case X86::BI__builtin_ia32_ktestcdi: + case X86::BI__builtin_ia32_ktestzdi: { +Intrinsic::ID IID; +switch (BuiltinID) { +default: llvm_unreachable("Unsupported intrinsic!"); +case X86::BI__builtin_ia32_ktestcqi: + IID = Intrinsic::x86_avx512_ktestc_b; + break; +case X86::BI__builtin_ia32_ktestzqi: + IID = Intrinsic::x86_avx512_ktestz_b; + break; +case X86::BI__builtin_ia32_ktestchi: + IID = Intrinsic::x86_avx512_ktestc_w; + break; +case X86::BI__builtin_ia32_ktestzhi: + IID = Intrinsic::x86_avx512_ktestz_w; + break; +case X86::BI__builtin_ia32_ktestcsi: + IID = Intrinsic::x86_avx512_ktestc_d; + break; +case X86::BI__builtin_ia32_ktestzsi: + IID = Intrinsic::x86_avx512_ktestz_d; + break; +case X86::BI__builtin_ia32_ktestcdi: + IID = Intrinsic::x86_avx512_ktestc_q; + break; +case X86::BI__builtin_ia32_ktestzdi: + IID = Intrinsic::x86_avx512_ktestz_q; + break; +} + +unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); +Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); +Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); +Function *Intr = CGM.getIntrinsic(IID); +return Builder.CreateCall(Intr, {LHS, RHS}); + } + case X86::BI__builtin_ia32_kaddqi: case X86::BI__builtin_ia32_kaddhi: case X86::BI__builtin_ia32_kaddsi: Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=341265&r1=341264&r2=341265&view=diff == --- cfe/trunk/lib/Headers/avx512bwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bwintrin.h Fri Aug 31 15:29:56 2018 @@ -143,6 +143,42 @@ _kortest_mask64_u8(__mmask64 __A, __mmas return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); } +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) +{ + return
r341678 - [X86] Modify addcarry/subborrow builtins to emit an 2 result and intrinsic and an store instruction.
Author: ctopper Date: Fri Sep 7 09:58:57 2018 New Revision: 341678 URL: http://llvm.org/viewvc/llvm-project?rev=341678&view=rev Log: [X86] Modify addcarry/subborrow builtins to emit an 2 result and intrinsic and an store instruction. This is the clang side of D51769. The llvm intrinsics now return two results instead of using an out parameter. Differential Revision: https://reviews.llvm.org/D51771 Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/adc-builtins.c cfe/trunk/test/CodeGen/adx-builtins.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=341678&r1=341677&r2=341678&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Sep 7 09:58:57 2018 @@ -10405,6 +10405,41 @@ Value *CodeGenFunction::EmitX86BuiltinEx Ops[0]); return Builder.CreateExtractValue(Call, 1); } + case X86::BI__builtin_ia32_addcarryx_u32: + case X86::BI__builtin_ia32_addcarryx_u64: + case X86::BI__builtin_ia32_addcarry_u32: + case X86::BI__builtin_ia32_addcarry_u64: + case X86::BI__builtin_ia32_subborrow_u32: + case X86::BI__builtin_ia32_subborrow_u64: { +Intrinsic::ID IID; +switch (BuiltinID) { +default: llvm_unreachable("Unsupported intrinsic!"); +case X86::BI__builtin_ia32_addcarryx_u32: + IID = Intrinsic::x86_addcarryx_u32; + break; +case X86::BI__builtin_ia32_addcarryx_u64: + IID = Intrinsic::x86_addcarryx_u64; + break; +case X86::BI__builtin_ia32_addcarry_u32: + IID = Intrinsic::x86_addcarry_u32; + break; +case X86::BI__builtin_ia32_addcarry_u64: + IID = Intrinsic::x86_addcarry_u64; + break; +case X86::BI__builtin_ia32_subborrow_u32: + IID = Intrinsic::x86_subborrow_u32; + break; +case X86::BI__builtin_ia32_subborrow_u64: + IID = Intrinsic::x86_subborrow_u64; + break; +} + +Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), + { Ops[0], Ops[1], Ops[2] }); +Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), + Ops[3]); +return Builder.CreateExtractValue(Call, 0); + } case X86::BI__builtin_ia32_fpclassps128_mask: case X86::BI__builtin_ia32_fpclassps256_mask: Modified: cfe/trunk/test/CodeGen/adc-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/adc-builtins.c?rev=341678&r1=341677&r2=341678&view=diff == --- cfe/trunk/test/CodeGen/adc-builtins.c (original) +++ cfe/trunk/test/CodeGen/adc-builtins.c Fri Sep 7 09:58:57 2018 @@ -5,7 +5,10 @@ unsigned char test_addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y, unsigned int *__p) { // CHECK-LABEL: test_addcarry_u32 -// CHECK: call i8 @llvm.x86.addcarry.u32 +// CHECK: [[ADC:%.*]] = call { i8, i32 } @llvm.x86.addcarry.u32 +// CHECK: [[DATA:%.*]] = extractvalue { i8, i32 } [[ADC]], 1 +// CHECK: store i32 [[DATA]], i32* %{{.*}} +// CHECK: [[CF:%.*]] = extractvalue { i8, i32 } [[ADC]], 0 return _addcarry_u32(__cf, __x, __y, __p); } @@ -13,14 +16,20 @@ unsigned char test_addcarry_u64(unsigned unsigned long long __y, unsigned long long *__p) { // CHECK-LABEL: test_addcarry_u64 -// CHECK: call i8 @llvm.x86.addcarry.u64 +// CHECK: [[ADC:%.*]] = call { i8, i64 } @llvm.x86.addcarry.u64 +// CHECK: [[DATA:%.*]] = extractvalue { i8, i64 } [[ADC]], 1 +// CHECK: store i64 [[DATA]], i64* %{{.*}} +// CHECK: [[CF:%.*]] = extractvalue { i8, i64 } [[ADC]], 0 return _addcarry_u64(__cf, __x, __y, __p); } unsigned char test_subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y, unsigned int *__p) { // CHECK-LABEL: test_subborrow_u32 -// CHECK: call i8 @llvm.x86.subborrow.u32 +// CHECK: [[SBB:%.*]] = call { i8, i32 } @llvm.x86.subborrow.u32 +// CHECK: [[DATA:%.*]] = extractvalue { i8, i32 } [[SBB]], 1 +// CHECK: store i32 [[DATA]], i32* %{{.*}} +// CHECK: [[CF:%.*]] = extractvalue { i8, i32 } [[SBB]], 0 return _subborrow_u32(__cf, __x, __y, __p); } @@ -28,6 +37,9 @@ unsigned char test_subborrow_u64(unsigne unsigned long long __y, unsigned long long *__p) { // CHECK-LABEL: test_subborrow_u64 -// CHECK: call i8 @llvm.x86.subborrow.u64 +// CHECK: [[SBB:%.*]] = call { i8, i64 } @llvm.x86.subborrow.u64 +// CHECK: [[DATA:%.*]] = extractvalue { i8, i64 } [[SBB]], 1 +// CHECK: store i64 [[DATA]], i64* %{{.*}} +// CHECK: [[CF:%.*]] = extractvalue { i8, i64 } [[SBB]], 0 return _subborrow_u64(__cf, __x, __y, __p); } Modified: cfe/trunk/test/CodeGen/adx-built
r341699 - [X86] Custom emit __builtin_rdtscp so we can emit an explicit store for the out parameter
Author: ctopper Date: Fri Sep 7 12:14:24 2018 New Revision: 341699 URL: http://llvm.org/viewvc/llvm-project?rev=341699&view=rev Log: [X86] Custom emit __builtin_rdtscp so we can emit an explicit store for the out parameter This is the clang side of D51803. The llvm intrinsic now returns two results. So we need to emit an explicit store in IR for the out parameter. This is similar to addcarry/subborrow/rdrand/rdseed. Differential Revision: https://reviews.llvm.org/D51805 Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/rd-builtins.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=341699&r1=341698&r2=341699&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Sep 7 12:14:24 2018 @@ -9158,6 +9158,12 @@ Value *CodeGenFunction::EmitX86BuiltinEx case X86::BI__rdtsc: { return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); } + case X86::BI__builtin_ia32_rdtscp: { +Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp)); +Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), + Ops[0]); +return Builder.CreateExtractValue(Call, 0); + } case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: Modified: cfe/trunk/test/CodeGen/rd-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/rd-builtins.c?rev=341699&r1=341698&r2=341699&view=diff == --- cfe/trunk/test/CodeGen/rd-builtins.c (original) +++ cfe/trunk/test/CodeGen/rd-builtins.c Fri Sep 7 12:14:24 2018 @@ -14,3 +14,12 @@ int test_rdtsc() { // CHECK: @test_rdtsc // CHECK: call i64 @llvm.x86.rdtsc } + +unsigned long long test_rdtscp(unsigned int *a) { +// CHECK: @test_rdtscp +// CHECK: [[RDTSCP:%.*]] = call { i64, i32 } @llvm.x86.rdtscp +// CHECK: [[TSC_AUX:%.*]] = extractvalue { i64, i32 } [[RDTSCP]], 1 +// CHECK: store i32 [[TSC_AUX]], i32* %{{.*}} +// CHECK: [[TSC:%.*]] = extractvalue { i64, i32 } [[RDTSCP]], 0 + return __rdtscp(a); +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r324647 - [X86] Replace kortest intrinsics with native IR.
Author: ctopper Date: Thu Feb 8 12:16:17 2018 New Revision: 324647 URL: http://llvm.org/viewvc/llvm-project?rev=324647&view=rev Log: [X86] Replace kortest intrinsics with native IR. Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=324647&r1=324646&r2=324647&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Feb 8 12:16:17 2018 @@ -8710,6 +8710,18 @@ Value *CodeGenFunction::EmitX86BuiltinEx return EmitX86MaskedCompare(*this, CC, false, Ops); } + case X86::BI__builtin_ia32_kortestchi: + case X86::BI__builtin_ia32_kortestzhi: { +Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); +Value *C; +if (BuiltinID == X86::BI__builtin_ia32_kortestchi) + C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty()); +else + C = llvm::Constant::getNullValue(Builder.getInt16Ty()); +Value *Cmp = Builder.CreateICmpEQ(Or, C); +return Builder.CreateZExt(Cmp, ConvertType(E->getType())); + } + case X86::BI__builtin_ia32_kandhi: return EmitX86MaskLogic(*this, Instruction::And, 16, Ops); case X86::BI__builtin_ia32_kandnhi: Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=324647&r1=324646&r2=324647&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Thu Feb 8 12:16:17 2018 @@ -6247,16 +6247,28 @@ __mmask16 test_mm512_kor(__m512i __A, __ __E, __F); } -int test_mm512_kortestc(__mmask16 __A, __mmask16 __B) { +int test_mm512_kortestc(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { // CHECK-LABEL: @test_mm512_kortestc - // CHECK: @llvm.x86.avx512.kortestc.w - return _mm512_kortestc(__A, __B); + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]] + // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16 + // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], -1 + // CHECK: zext i1 [[CMP]] to i32 + return _mm512_kortestc(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)); } -int test_mm512_kortestz(__mmask16 __A, __mmask16 __B) { +int test_mm512_kortestz(__m512i __A, __m512i __B, __m512i __C, __m512i __D) { // CHECK-LABEL: @test_mm512_kortestz - // CHECK: @llvm.x86.avx512.kortestz.w - return _mm512_kortestz(__A, __B); + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]] + // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16 + // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], 0 + // CHECK: zext i1 [[CMP]] to i32 + return _mm512_kortestz(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)); } __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r324828 - [X86] Change the signature of the AVX512 packed fp compare intrinsics to return vXi1 mask. Make bitcasts to scalar explicit in IR
Author: ctopper Date: Sat Feb 10 15:34:27 2018 New Revision: 324828 URL: http://llvm.org/viewvc/llvm-project?rev=324828&view=rev Log: [X86] Change the signature of the AVX512 packed fp compare intrinsics to return vXi1 mask. Make bitcasts to scalar explicit in IR Summary: This is the clang equivalent of r324827 Reviewers: zvi, delena, RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D43143 Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=324828&r1=324827&r2=324828&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sat Feb 10 15:34:27 2018 @@ -8060,6 +8060,29 @@ static Value *EmitX86Select(CodeGenFunct return CGF.Builder.CreateSelect(Mask, Op0, Op1); } +static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, + unsigned NumElts, Value *MaskIn) { + if (MaskIn) { +const auto *C = dyn_cast(MaskIn); +if (!C || !C->isAllOnesValue()) + Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts)); + } + + if (NumElts < 8) { +uint32_t Indices[8]; +for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; +for (unsigned i = NumElts; i != 8; ++i) + Indices[i] = i % NumElts + NumElts; +Cmp = CGF.Builder.CreateShuffleVector( +Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); + } + + return CGF.Builder.CreateBitCast(Cmp, + IntegerType::get(CGF.getLLVMContext(), +std::max(NumElts, 8U))); +} + static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef Ops) { assert((Ops.size() == 2 || Ops.size() == 4) && @@ -8087,24 +8110,11 @@ static Value *EmitX86MaskedCompare(CodeG Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); } - if (Ops.size() == 4) { -const auto *C = dyn_cast(Ops[3]); -if (!C || !C->isAllOnesValue()) - Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops[3], NumElts)); - } + Value *MaskIn = nullptr; + if (Ops.size() == 4) +MaskIn = Ops[3]; - if (NumElts < 8) { -uint32_t Indices[8]; -for (unsigned i = 0; i != NumElts; ++i) - Indices[i] = i; -for (unsigned i = NumElts; i != 8; ++i) - Indices[i] = i % NumElts + NumElts; -Cmp = CGF.Builder.CreateShuffleVector( -Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); - } - return CGF.Builder.CreateBitCast(Cmp, - IntegerType::get(CGF.getLLVMContext(), -std::max(NumElts, 8U))); + return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn); } static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { @@ -8882,6 +8892,43 @@ Value *CodeGenFunction::EmitX86BuiltinEx return Builder.CreateExtractValue(Call, 1); } + case X86::BI__builtin_ia32_cmpps128_mask: + case X86::BI__builtin_ia32_cmpps256_mask: + case X86::BI__builtin_ia32_cmpps512_mask: + case X86::BI__builtin_ia32_cmppd128_mask: + case X86::BI__builtin_ia32_cmppd256_mask: + case X86::BI__builtin_ia32_cmppd512_mask: { +unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); +Value *MaskIn = Ops[3]; +Ops.erase(&Ops[3]); + +Intrinsic::ID ID; +switch (BuiltinID) { +default: llvm_unreachable("Unsupported intrinsic!"); +case X86::BI__builtin_ia32_cmpps128_mask: + ID = Intrinsic::x86_avx512_mask_cmp_ps_128; + break; +case X86::BI__builtin_ia32_cmpps256_mask: + ID = Intrinsic::x86_avx512_mask_cmp_ps_256; + break; +case X86::BI__builtin_ia32_cmpps512_mask: + ID = Intrinsic::x86_avx512_mask_cmp_ps_512; + break; +case X86::BI__builtin_ia32_cmppd128_mask: + ID = Intrinsic::x86_avx512_mask_cmp_pd_128; + break; +case X86::BI__builtin_ia32_cmppd256_mask: + ID = Intrinsic::x86_avx512_mask_cmp_pd_256; + break; +case X86::BI__builtin_ia32_cmppd512_mask: + ID = Intrinsic::x86_avx512_mask_cmp_pd_512; + break; +} + +Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); +return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn); + } + // SSE packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: case X86::BI__builtin_ia32_cmpeqpd: Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=324828&r1=324827&r2=324828&view=diff ==
r324954 - [X86] Reverse the operand order of the implementation of the kunpack builtins.
Author: ctopper Date: Mon Feb 12 14:38:52 2018 New Revision: 324954 URL: http://llvm.org/viewvc/llvm-project?rev=324954&view=rev Log: [X86] Reverse the operand order of the implementation of the kunpack builtins. The second operand needs to be in the lower bits of the concatenation. This matches llvm 5.0, gcc, and icc behavior. Fixes PR36360. Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=324954&r1=324953&r2=324954&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Feb 12 14:38:52 2018 @@ -8846,7 +8846,8 @@ Value *CodeGenFunction::EmitX86BuiltinEx RHS = Builder.CreateShuffleVector(RHS, RHS, makeArrayRef(Indices, NumElts / 2)); // Concat the vectors. -Value *Res = Builder.CreateShuffleVector(LHS, RHS, +// NOTE: Operands are swapped to match the intrinsic definition. +Value *Res = Builder.CreateShuffleVector(RHS, LHS, makeArrayRef(Indices, NumElts)); return Builder.CreateBitCast(Res, Ops[0]->getType()); } Modified: cfe/trunk/test/CodeGen/avx512bw-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512bw-builtins.c?rev=324954&r1=324953&r2=324954&view=diff == --- cfe/trunk/test/CodeGen/avx512bw-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512bw-builtins.c Mon Feb 12 14:38:52 2018 @@ -1632,7 +1632,7 @@ __mmask64 test_mm512_kunpackd(__m512i __ // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1> // CHECK: [[LHS2:%.*]] = shufflevector <64 x i1> [[LHS]], <64 x i1> [[LHS]], <32 x i32> // CHECK: [[RHS2:%.*]] = shufflevector <64 x i1> [[RHS]], <64 x i1> [[RHS]], <32 x i32> - // CHECK: [[CONCAT:%.*]] = shufflevector <32 x i1> [[LHS2]], <32 x i1> [[RHS2]], <64 x i32> + // CHECK: [[CONCAT:%.*]] = shufflevector <32 x i1> [[RHS2]], <32 x i1> [[LHS2]], <64 x i32> // CHECK: bitcast <64 x i1> [[CONCAT]] to i64 return _mm512_mask_cmpneq_epu8_mask(_mm512_kunpackd(_mm512_cmpneq_epu8_mask(__B, __A),_mm512_cmpneq_epu8_mask(__C, __D)), __E, __F); } @@ -1643,7 +1643,7 @@ __mmask32 test_mm512_kunpackw(__m512i __ // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> // CHECK: [[LHS2:%.*]] = shufflevector <32 x i1> [[LHS]], <32 x i1> [[LHS]], <16 x i32> // CHECK: [[RHS2:%.*]] = shufflevector <32 x i1> [[RHS]], <32 x i1> [[RHS]], <16 x i32> - // CHECK: [[CONCAT:%.*]] = shufflevector <16 x i1> [[LHS2]], <16 x i1> [[RHS2]], <32 x i32> + // CHECK: [[CONCAT:%.*]] = shufflevector <16 x i1> [[RHS2]], <16 x i1> [[LHS2]], <32 x i32> return _mm512_mask_cmpneq_epu16_mask(_mm512_kunpackw(_mm512_cmpneq_epu16_mask(__B, __A),_mm512_cmpneq_epu16_mask(__C, __D)), __E, __F); } Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=324954&r1=324953&r2=324954&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Mon Feb 12 14:38:52 2018 @@ -6281,7 +6281,7 @@ __mmask16 test_mm512_kunpackb(__m512i __ // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> // CHECK: [[LHS2:%.*]] = shufflevector <16 x i1> [[LHS]], <16 x i1> [[LHS]], <8 x i32> // CHECK: [[RHS2:%.*]] = shufflevector <16 x i1> [[RHS]], <16 x i1> [[RHS]], <8 x i32> - // CHECK: [[CONCAT:%.*]] = shufflevector <8 x i1> [[LHS2]], <8 x i1> [[RHS2]], <16 x i32> + // CHECK: [[CONCAT:%.*]] = shufflevector <8 x i1> [[RHS2]], <8 x i1> [[LHS2]], <16 x i32> // CHECK: bitcast <16 x i1> [[CONCAT]] to i16 return _mm512_mask_cmpneq_epu32_mask(_mm512_kunpackb(_mm512_cmpneq_epu32_mask(__A, __B), _mm512_cmpneq_epu32_mask(__C, __D)), ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r325560 - [X86] Remove mask from 512 bit pmulhrsw/pmulhw/pmulhuw builtins.
Author: ctopper Date: Mon Feb 19 23:28:18 2018 New Revision: 325560 URL: http://llvm.org/viewvc/llvm-project?rev=325560&view=rev Log: [X86] Remove mask from 512 bit pmulhrsw/pmulhw/pmulhuw builtins. We now use a vselect node in IR around an unmasked builtin. This makes it consistent with the 128 and 256 bit versions. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=325560&r1=325559&r2=325560&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon Feb 19 23:28:18 2018 @@ -1138,9 +1138,9 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2var TARGET_BUILTIN(__builtin_ia32_vpermt2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_vpermt2varhi256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmulhrsw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmulhuw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmulhw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_addpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_addps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512f") Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=325560&r1=325559&r2=325560&view=diff == --- cfe/trunk/lib/Headers/avx512bwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bwintrin.h Mon Feb 19 23:28:18 2018 @@ -1008,87 +1008,70 @@ _mm512_maskz_permutex2var_epi16 (__mmask } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mulhrs_epi16 (__m512i __A, __m512i __B) +_mm512_mulhrs_epi16(__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A, -(__v32hi) __B, -(__v32hi) _mm512_setzero_hi(), -(__mmask32) -1); + return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_mulhrs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, -__m512i __B) +_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A, -(__v32hi) __B, -(__v32hi) __W, -(__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_mulhrs_epi16(__A, __B), + (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_mulhrs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) +_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A, -(__v32hi) __B, -(__v32hi) _mm512_setzero_hi(), -(__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_mulhrs_epi16(__A, __B), + (__v32hi)_mm512_setzero_hi()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mulhi_epi16 (__m512i __A, __m512i __B) +_mm512_mulhi_epi16(__m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), - (__mmask32) -1); + return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_mulhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A, +_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A, - (__v32hi) __B, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_mulhi_epi16(__A, __B), + (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_mulhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B) +_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { - return (__m512i
r325655 - [X86] Disable CLWB in Cannon Lake
Author: ctopper Date: Tue Feb 20 16:16:50 2018 New Revision: 325655 URL: http://llvm.org/viewvc/llvm-project?rev=325655&view=rev Log: [X86] Disable CLWB in Cannon Lake Cannon Lake does not support CLWB, therefore it does not include all features listed under SKX. Patch by Gabor Buella Differential Revision: https://reviews.llvm.org/D43459 Modified: cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=325655&r1=325654&r2=325655&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Tue Feb 20 16:16:50 2018 @@ -175,7 +175,8 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "avx512bw", true); setFeatureEnabledImpl(Features, "avx512vl", true); setFeatureEnabledImpl(Features, "pku", true); -setFeatureEnabledImpl(Features, "clwb", true); +if (Kind != CK_Cannonlake) // CNL inherits all SKX features, except CLWB + setFeatureEnabledImpl(Features, "clwb", true); LLVM_FALLTHROUGH; case CK_SkylakeClient: setFeatureEnabledImpl(Features, "xsavec", true); Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=325655&r1=325654&r2=325655&view=diff == --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original) +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Tue Feb 20 16:16:50 2018 @@ -974,7 +974,7 @@ // CHECK_CNL_M32: #define __BMI2__ 1 // CHECK_CNL_M32: #define __BMI__ 1 // CHECK_CNL_M32: #define __CLFLUSHOPT__ 1 -// CHECK_CNL_M32: #define __CLWB__ 1 +// CHECK_CNL_M32-NOT: #define __CLWB__ 1 // CHECK_CNL_M32: #define __F16C__ 1 // CHECK_CNL_M32: #define __FMA__ 1 // CHECK_CNL_M32: #define __LZCNT__ 1 @@ -1022,7 +1022,7 @@ // CHECK_CNL_M64: #define __BMI2__ 1 // CHECK_CNL_M64: #define __BMI__ 1 // CHECK_CNL_M64: #define __CLFLUSHOPT__ 1 -// CHECK_CNL_M64: #define __CLWB__ 1 +// CHECK_CNL_M64-NOT: #define __CLWB__ 1 // CHECK_CNL_M64: #define __F16C__ 1 // CHECK_CNL_M64: #define __FMA__ 1 // CHECK_CNL_M64: #define __LZCNT__ 1 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r326022 - [X86] Remove __builtin_ia32_permvarsf256_mask and __builtin_ia32_permvarsi256_mask and use the avx2 unmasked versions and a select instead.
Author: ctopper Date: Fri Feb 23 22:46:42 2018 New Revision: 326022 URL: http://llvm.org/viewvc/llvm-project?rev=326022&view=rev Log: [X86] Remove __builtin_ia32_permvarsf256_mask and __builtin_ia32_permvarsi256_mask and use the avx2 unmasked versions and a select instead. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=326022&r1=326021&r2=326022&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Feb 23 22:46:42 2018 @@ -1799,8 +1799,6 @@ TARGET_BUILTIN(__builtin_ia32_permvarhi1 TARGET_BUILTIN(__builtin_ia32_permvarhi256_mask, "V16sV16sV16sV16sUs","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_permvardf256_mask, "V4dV4dV4LLiV4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_permvardi256_mask, "V4LLiV4LLiV4LLiV4LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvarsf256_mask, "V8fV8fV8iV8fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvarsi256_mask, "V8iV8iV8iV8iUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc","","avx512dq,avx512vl") Modified: cfe/trunk/lib/Headers/avx512vlintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=326022&r1=326021&r2=326022&view=diff == --- cfe/trunk/lib/Headers/avx512vlintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vlintrin.h Fri Feb 23 22:46:42 2018 @@ -8178,60 +8178,41 @@ _mm256_mask_permutexvar_epi64 (__m256i _ __M); } -static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X, - __m256 __Y) -{ - return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, -(__v8si) __X, -(__v8sf) __W, -(__mmask8) __U); -} +#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y) +_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) { - return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, -(__v8si) __X, -(__v8sf) _mm256_setzero_ps (), -(__mmask8) __U); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, +(__v8sf)_mm256_permutexvar_ps(__X, __Y), +(__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_permutexvar_ps (__m256i __X, __m256 __Y) +_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) { - return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, -(__v8si) __X, -(__v8sf) _mm256_undefined_si256 (), -(__mmask8) -1); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, +(__v8sf)_mm256_permutexvar_ps(__X, __Y), +(__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) -{ - return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, - (__v8si) __X, - (__v8si) _mm256_setzero_si256 (), - __M); -} +#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A)) static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X, - __m256i __Y) +_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, + __m256i __Y) { - return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, - (__v8si) __X, - (__v8si) __W, - (__mmask8) __M); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, + (__v8si)_mm256_permutexvar_epi32(__X, __Y), + (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y) +_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { - return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, - (__v8si) __X, - (__v8si) _mm256_undefined_si256(), - (__mmask8) -1); + return (__m256i)__builtin_ia32_select
r326039 - [X86] Remove some masked cvt builtins that can be replaced with legacy sse/avx buiiltins and a select.
Author: ctopper Date: Sat Feb 24 10:55:13 2018 New Revision: 326039 URL: http://llvm.org/viewvc/llvm-project?rev=326039&view=rev Log: [X86] Remove some masked cvt builtins that can be replaced with legacy sse/avx buiiltins and a select. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512vl-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=326039&r1=326038&r2=326039&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sat Feb 24 10:55:13 2018 @@ -1195,26 +1195,15 @@ TARGET_BUILTIN(__builtin_ia32_compressst TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtdq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256_mask, "V8fV8iV8fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256_mask, "V4iV4dV4iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps_mask, "V4fV2dV4fUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256_mask, "V4fV4dV4fUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtpd2udq128_mask, "V4iV2dV4iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtpd2udq256_mask, "V4iV4dV4iUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtps2dq128_mask, "V4iV4fV4iUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtps2dq256_mask, "V8iV8fV8iUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtps2pd128_mask, "V2dV4fV2dUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtps2pd256_mask, "V4dV4fV4dUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtps2udq128_mask, "V4iV4fV4iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtps2udq256_mask, "V8iV8fV8iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvttpd2dq128_mask, "V4iV2dV4iUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256_mask, "V4iV4dV4iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvttpd2udq128_mask, "V4iV2dV4iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvttpd2udq256_mask, "V4iV4dV4iUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvttps2dq128_mask, "V4iV4fV4iUc", "", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvttps2dq256_mask, "V8iV8fV8iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "", "avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtudq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl") Modified: cfe/trunk/lib/Headers/avx512vlintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=326039&r1=326038&r2=326039&view=diff == --- cfe/trunk/lib/Headers/avx512vlintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vlintrin.h Sat Feb 24 10:55:13 2018 @@ -1785,32 +1785,30 @@ _mm256_maskz_cvtepi32_pd (__mmask8 __U, static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { - return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A, - (__v4sf) __W, - (__mmask8) __U); + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_cvtepi32_ps(__A), + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) { - return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_cvtepi32_ps(__A), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { - return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A, - (__v8sf) __W, - (__mmask8) __U); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_cvtepi32_ps(__A), + (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) { - return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A, - (__v8sf) - _mm256_setzero_ps
r304326 - [TableGen] Clang changes to support Record::getValueAsString and getValueAsListOfStrings returning StringRef instead of std::string
Author: ctopper Date: Wed May 31 14:01:22 2017 New Revision: 304326 URL: http://llvm.org/viewvc/llvm-project?rev=304326&view=rev Log: [TableGen] Clang changes to support Record::getValueAsString and getValueAsListOfStrings returning StringRef instead of std::string This is the clang version of D33710. Differential Revision: https://reviews.llvm.org/D33711 Modified: cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp cfe/trunk/utils/TableGen/ClangDiagnosticsEmitter.cpp cfe/trunk/utils/TableGen/ClangOptionDocEmitter.cpp cfe/trunk/utils/TableGen/ClangSACheckersEmitter.cpp Modified: cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp?rev=304326&r1=304325&r2=304326&view=diff == --- cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp (original) +++ cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp Wed May 31 14:01:22 2017 @@ -718,9 +718,9 @@ namespace { }; // Unique the enums, but maintain the original declaration ordering. - std::vector - uniqueEnumsInOrder(const std::vector &enums) { -std::vector uniques; + std::vector + uniqueEnumsInOrder(const std::vector &enums) { +std::vector uniques; SmallDenseSet unique_set; for (const auto &i : enums) { if (unique_set.insert(i).second) @@ -731,7 +731,8 @@ namespace { class EnumArgument : public Argument { std::string type; -std::vector values, enums, uniques; +std::vector values, enums, uniques; + public: EnumArgument(const Record &Arg, StringRef Attr) : Argument(Arg, Attr), type(Arg.getValueAsString("Type")), @@ -850,7 +851,7 @@ namespace { class VariadicEnumArgument: public VariadicArgument { std::string type, QualifiedTypeName; -std::vector values, enums, uniques; +std::vector values, enums, uniques; protected: void writeValueImpl(raw_ostream &OS) const override { @@ -1591,8 +1592,9 @@ struct AttributeSubjectMatchRule { } std::string getEnumValueName() const { -std::string Result = -"SubjectMatchRule_" + MetaSubject->getValueAsString("Name"); +SmallString<128> Result; +Result += "SubjectMatchRule_"; +Result += MetaSubject->getValueAsString("Name"); if (isSubRule()) { Result += "_"; if (isNegatedSubRule()) @@ -1601,7 +1603,7 @@ struct AttributeSubjectMatchRule { } if (isAbstractRule()) Result += "_abstract"; -return Result; +return Result.str(); } std::string getEnumValue() const { return "attr::" + getEnumValueName(); } @@ -2603,7 +2605,7 @@ void EmitClangAttrPCHWrite(RecordKeeper // append a unique suffix to distinguish this set of target checks from other // TargetSpecificAttr records. static void GenerateTargetSpecificAttrChecks(const Record *R, - std::vector &Arches, + std::vector &Arches, std::string &Test, std::string *FnName) { // It is assumed that there will be an llvm::Triple object @@ -2613,8 +2615,9 @@ static void GenerateTargetSpecificAttrCh Test += "("; for (auto I = Arches.begin(), E = Arches.end(); I != E; ++I) { -std::string Part = *I; -Test += "T.getArch() == llvm::Triple::" + Part; +StringRef Part = *I; +Test += "T.getArch() == llvm::Triple::"; +Test += Part; if (I + 1 != E) Test += " || "; if (FnName) @@ -2627,11 +2630,12 @@ static void GenerateTargetSpecificAttrCh // We know that there was at least one arch test, so we need to and in the // OS tests. Test += " && ("; -std::vector OSes = R->getValueAsListOfStrings("OSes"); +std::vector OSes = R->getValueAsListOfStrings("OSes"); for (auto I = OSes.begin(), E = OSes.end(); I != E; ++I) { - std::string Part = *I; + StringRef Part = *I; - Test += "T.getOS() == llvm::Triple::" + Part; + Test += "T.getOS() == llvm::Triple::"; + Test += Part; if (I + 1 != E) Test += " || "; if (FnName) @@ -2643,10 +2647,11 @@ static void GenerateTargetSpecificAttrCh // If one or more CXX ABIs are specified, check those as well. if (!R->isValueUnset("CXXABIs")) { Test += " && ("; -std::vector CXXABIs = R->getValueAsListOfStrings("CXXABIs"); +std::vector CXXABIs = R->getValueAsListOfStrings("CXXABIs"); for (auto I = CXXABIs.begin(), E = CXXABIs.end(); I != E; ++I) { - std::string Part = *I; - Test += "Target.getCXXABI().getKind() == TargetCXXABI::" + Part; + StringRef Part = *I; + Test += "Target.getCXXABI().getKind() == TargetCXXABI::"; + Test += Part; if (I + 1 != E) Test += " || "; if (FnName) @@ -2684,7 +2689,7 @@ static void GenerateHasAttrSpellingStrin std::string Test; if (Attr->isSubClassOf
r305439 - [Basic] Use a static_assert instead of using the old array of size -1 trick.
Author: ctopper Date: Wed Jun 14 20:27:58 2017 New Revision: 305439 URL: http://llvm.org/viewvc/llvm-project?rev=305439&view=rev Log: [Basic] Use a static_assert instead of using the old array of size -1 trick. Modified: cfe/trunk/include/clang/Basic/AllDiagnostics.h Modified: cfe/trunk/include/clang/Basic/AllDiagnostics.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/AllDiagnostics.h?rev=305439&r1=305438&r2=305439&view=diff == --- cfe/trunk/include/clang/Basic/AllDiagnostics.h (original) +++ cfe/trunk/include/clang/Basic/AllDiagnostics.h Wed Jun 14 20:27:58 2017 @@ -28,7 +28,7 @@ namespace clang { template class StringSizerHelper { - char FIELD_TOO_SMALL[SizeOfStr <= FieldType(~0U) ? 1 : -1]; + static_assert(SizeOfStr <= FieldType(~0U), "Field too small!"); public: enum { Size = SizeOfStr }; }; ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r320915 - [X86] Add builtins and tests for 128 and 256 bit vpopcntdq.
Author: ctopper Date: Fri Dec 15 22:02:31 2017 New Revision: 320915 URL: http://llvm.org/viewvc/llvm-project?rev=320915&view=rev Log: [X86] Add builtins and tests for 128 and 256 bit vpopcntdq. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/CMakeLists.txt cfe/trunk/lib/Headers/immintrin.h Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=320915&r1=320914&r2=320915&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Dec 15 22:02:31 2017 @@ -1060,6 +1060,10 @@ TARGET_BUILTIN(__builtin_ia32_vpconflict TARGET_BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "", "avx512cd") TARGET_BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "", "avx512cd") +TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "", "avx512vpopcntdq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2LLiV2LLi", "", "avx512vpopcntdq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpopcntd_256, "V8iV8i", "", "avx512vpopcntdq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "", "avx512vpopcntdq,avx512vl") TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq") TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=320915&r1=320914&r2=320915&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Dec 15 22:02:31 2017 @@ -7955,6 +7955,10 @@ Value *CodeGenFunction::EmitX86BuiltinEx case X86::BI__builtin_ia32_storesd128_mask: { return EmitX86MaskedStore(*this, Ops, 16); } + case X86::BI__builtin_ia32_vpopcntd_128: + case X86::BI__builtin_ia32_vpopcntq_128: + case X86::BI__builtin_ia32_vpopcntd_256: + case X86::BI__builtin_ia32_vpopcntq_256: case X86::BI__builtin_ia32_vpopcntd_512: case X86::BI__builtin_ia32_vpopcntq_512: { llvm::Type *ResultType = ConvertType(E->getType()); Modified: cfe/trunk/lib/Headers/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/CMakeLists.txt?rev=320915&r1=320914&r2=320915&view=diff == --- cfe/trunk/lib/Headers/CMakeLists.txt (original) +++ cfe/trunk/lib/Headers/CMakeLists.txt Fri Dec 15 22:02:31 2017 @@ -21,6 +21,7 @@ set(files avx512vlcdintrin.h avx512vldqintrin.h avx512vlintrin.h + avx512vpopcntdqvlintrin.h avxintrin.h bmi2intrin.h bmiintrin.h Modified: cfe/trunk/lib/Headers/immintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=320915&r1=320914&r2=320915&view=diff == --- cfe/trunk/lib/Headers/immintrin.h (original) +++ cfe/trunk/lib/Headers/immintrin.h Fri Dec 15 22:02:31 2017 @@ -154,6 +154,11 @@ _mm256_cvtph_ps(__m128i __a) #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || \ +(defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) #include #endif ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r320916 - [X86] Add the two files I forgot to commit in r320915.
Author: ctopper Date: Fri Dec 15 22:10:24 2017 New Revision: 320916 URL: http://llvm.org/viewvc/llvm-project?rev=320916&view=rev Log: [X86] Add the two files I forgot to commit in r320915. Added: cfe/trunk/lib/Headers/avx512vpopcntdqvlintrin.h cfe/trunk/test/CodeGen/avx512vpopcntdqvlintrin.c Added: cfe/trunk/lib/Headers/avx512vpopcntdqvlintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vpopcntdqvlintrin.h?rev=320916&view=auto == --- cfe/trunk/lib/Headers/avx512vpopcntdqvlintrin.h (added) +++ cfe/trunk/lib/Headers/avx512vpopcntdqvlintrin.h Fri Dec 15 22:10:24 2017 @@ -0,0 +1,99 @@ +/*===- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics + *--=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===---=== + */ +#ifndef __IMMINTRIN_H +#error \ +"Never use directly; include instead." +#endif + +#ifndef __AVX512VPOPCNTDQVLINTRIN_H +#define __AVX512VPOPCNTDQVLINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"))) + +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi64(__m128i __A) { + return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { + return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi32(__m128i __A) { + return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { + return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi64(__m256i __A) { + return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { + return (__m256i)__builtin_ia32_selectq_256( + (__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { + return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi32(__m256i __A) { + return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { + return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A); +} + +#undef __DEFAULT_FN_ATTRS + +#endif Added: cfe/trunk/test/CodeGen/avx512vpopcntdqvlintrin.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vpopcntdqvlintrin.c?rev=320916&view=auto ==
r320919 - [X86] Implement kand/kandn/kor/kxor/kxnor/knot intrinsics using native IR.
Author: ctopper Date: Sat Dec 16 00:26:22 2017 New Revision: 320919 URL: http://llvm.org/viewvc/llvm-project?rev=320919&view=rev Log: [X86] Implement kand/kandn/kor/kxor/kxnor/knot intrinsics using native IR. Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=320919&r1=320918&r2=320919&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sat Dec 16 00:26:22 2017 @@ -7564,6 +7564,19 @@ static Value *EmitX86MaskedLoad(CodeGenF return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); } +static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, + unsigned NumElts, SmallVectorImpl &Ops, + bool InvertLHS = false) { + Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); + Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); + + if (InvertLHS) +LHS = CGF.Builder.CreateNot(LHS); + + return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS), + CGF.Builder.getIntNTy(std::max(NumElts, 8U))); +} + static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, SmallVectorImpl &Ops, llvm::Type *DstTy, @@ -8217,6 +8230,22 @@ Value *CodeGenFunction::EmitX86BuiltinEx return EmitX86MaskedCompare(*this, CC, false, Ops); } + case X86::BI__builtin_ia32_kandhi: +return EmitX86MaskLogic(*this, Instruction::And, 16, Ops); + case X86::BI__builtin_ia32_kandnhi: +return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true); + case X86::BI__builtin_ia32_korhi: +return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); + case X86::BI__builtin_ia32_kxnorhi: +return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true); + case X86::BI__builtin_ia32_kxorhi: +return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops); + case X86::BI__builtin_ia32_knothi: { +Ops[0] = getMaskVecValue(*this, Ops[0], 16); +return Builder.CreateBitCast(Builder.CreateNot(Ops[0]), + Builder.getInt16Ty()); + } + case X86::BI__builtin_ia32_vplzcntd_128_mask: case X86::BI__builtin_ia32_vplzcntd_256_mask: case X86::BI__builtin_ia32_vplzcntd_512_mask: Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=320919&r1=320918&r2=320919&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sat Dec 16 00:26:22 2017 @@ -385,7 +385,9 @@ __m512d test_mm512_set1_pd(double d) __mmask16 test_mm512_knot(__mmask16 a) { // CHECK-LABEL: @test_mm512_knot - // CHECK: @llvm.x86.avx512.knot.w + // CHECK: [[IN:%.*]] = bitcast i16 %1 to <16 x i1> + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], + // CHECK: bitcast <16 x i1> [[NOT]] to i16 return _mm512_knot(a); } @@ -6211,22 +6213,38 @@ __m512i test_mm512_mask_permutexvar_epi3 return _mm512_mask_permutexvar_epi32(__W, __M, __X, __Y); } -__mmask16 test_mm512_kand(__mmask16 __A, __mmask16 __B) { +__mmask16 test_mm512_kand(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: @test_mm512_kand - // CHECK: @llvm.x86.avx512.kand.w - return _mm512_kand(__A, __B); + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RES:%.*]] = and <16 x i1> [[LHS]], [[RHS]] + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_mm512_kand(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)), + __E, __F); } -__mmask16 test_mm512_kandn(__mmask16 __A, __mmask16 __B) { +__mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: @test_mm512_kandn - // CHECK: @llvm.x86.avx512.kandn.w - return _mm512_kandn(__A, __B); + // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], + // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]] + // CHECK: bitcast <16 x i1> [[RES]] to i16 + return _mm512_mask_cmpneq_epu32_mask(_mm512_kandn(_mm512_cmpneq_epu32_mask(__A, __B), + _mm512_cmpneq_epu32_mask(__C, __D)), +__E, __F); } -__mmask16 test_mm512_kor(
Re: r320971 - [X86] Use {{.*}} instead of hardcoded %1 in knot test.
Thanks! ~Craig On Mon, Dec 18, 2017 at 3:29 AM, Martin Bohme via cfe-commits < cfe-commits@lists.llvm.org> wrote: > Author: mboehme > Date: Mon Dec 18 03:29:21 2017 > New Revision: 320971 > > URL: http://llvm.org/viewvc/llvm-project?rev=320971&view=rev > Log: > [X86] Use {{.*}} instead of hardcoded %1 in knot test. > > This makes the test more resilient and consistent with the other tests > introduced in r320919. > > Modified: > cfe/trunk/test/CodeGen/avx512f-builtins.c > > Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c > URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/ > CodeGen/avx512f-builtins.c?rev=320971&r1=320970&r2=320971&view=diff > > == > --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) > +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Mon Dec 18 03:29:21 2017 > @@ -385,7 +385,7 @@ __m512d test_mm512_set1_pd(double d) > __mmask16 test_mm512_knot(__mmask16 a) > { >// CHECK-LABEL: @test_mm512_knot > - // CHECK: [[IN:%.*]] = bitcast i16 %1 to <16 x i1> > + // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1> >// CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 > true, i1 true, i1 true, i1 true, i1 true, i1 true> >// CHECK: bitcast <16 x i1> [[NOT]] to i16 >return _mm512_knot(a); > > > ___ > cfe-commits mailing list > cfe-commits@lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits > ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r321129 - [X86] Add more CPUID bits to cpuid.h to match gcc and support icelake features.
Author: ctopper Date: Tue Dec 19 16:46:09 2017 New Revision: 321129 URL: http://llvm.org/viewvc/llvm-project?rev=321129&view=rev Log: [X86] Add more CPUID bits to cpuid.h to match gcc and support icelake features. Modified: cfe/trunk/lib/Headers/cpuid.h Modified: cfe/trunk/lib/Headers/cpuid.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/cpuid.h?rev=321129&r1=321128&r2=321129&view=diff == --- cfe/trunk/lib/Headers/cpuid.h (original) +++ cfe/trunk/lib/Headers/cpuid.h Tue Dec 19 16:46:09 2017 @@ -173,16 +173,24 @@ #define bit_AVX512VL0x8000 /* Features in %ecx for leaf 7 sub-leaf 0 */ -#define bit_PREFTCHWT1 0x0001 -#define bit_AVX512VBMI 0x0002 -#define bit_PKU 0x0004 -#define bit_OSPKE 0x0010 +#define bit_PREFTCHWT1 0x0001 +#define bit_AVX512VBMI 0x0002 +#define bit_PKU 0x0004 +#define bit_OSPKE0x0010 +#define bit_AVX512VBMI2 0x0040 +#define bit_SHSTK0x0080 +#define bit_GFNI 0x0100 +#define bit_VAES 0x0200 +#define bit_VPCLMULQDQ 0x0400 +#define bit_AVX512VNNI 0x0800 +#define bit_AVX512BITALG 0x1000 #define bit_AVX512VPOPCNTDQ 0x4000 -#define bit_RDPID 0x0040 +#define bit_RDPID0x0040 /* Features in %edx for leaf 7 sub-leaf 0 */ #define bit_AVX5124VNNIW 0x0004 #define bit_AVX5124FMAPS 0x0008 +#define bit_IBT 0x0010 /* Features in %eax for leaf 13 sub-leaf 1 */ #define bit_XSAVEOPT0x0001 @@ -192,6 +200,7 @@ /* Features in %ecx for leaf 0x8001 */ #define bit_LAHF_LM 0x0001 #define bit_ABM 0x0020 +#define bit_LZCNT bit_ABM/* for gcc compat */ #define bit_SSE4a 0x0040 #define bit_PRFCHW 0x0100 #define bit_XOP 0x0800 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r321325 - [X86] Allow _mm_prefetch (both the header implementation and the builtin) to accept bit 2 which is supposed to indicate the prefetched addresses will be written to
Author: ctopper Date: Thu Dec 21 15:50:22 2017 New Revision: 321325 URL: http://llvm.org/viewvc/llvm-project?rev=321325&view=rev Log: [X86] Allow _mm_prefetch (both the header implementation and the builtin) to accept bit 2 which is supposed to indicate the prefetched addresses will be written to Add the appropriate _MM_HINT_ET0/ET1 defines to match gcc. Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/xmmintrin.h cfe/trunk/lib/Sema/SemaChecking.cpp Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=321325&r1=321324&r2=321325&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Dec 21 15:50:22 2017 @@ -8022,8 +8022,9 @@ Value *CodeGenFunction::EmitX86BuiltinEx default: return nullptr; case X86::BI_mm_prefetch: { Value *Address = Ops[0]; -Value *RW = ConstantInt::get(Int32Ty, 0); -Value *Locality = Ops[1]; +ConstantInt *C = cast(Ops[1]); +Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1); +Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3); Value *Data = ConstantInt::get(Int32Ty, 1); Value *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, Data}); Modified: cfe/trunk/lib/Headers/xmmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/xmmintrin.h?rev=321325&r1=321324&r2=321325&view=diff == --- cfe/trunk/lib/Headers/xmmintrin.h (original) +++ cfe/trunk/lib/Headers/xmmintrin.h Thu Dec 21 15:50:22 2017 @@ -2035,9 +2035,11 @@ _mm_storer_ps(float *__p, __m128 __a) _mm_store_ps(__p, __a); } -#define _MM_HINT_T0 3 -#define _MM_HINT_T1 2 -#define _MM_HINT_T2 1 +#define _MM_HINT_ET0 7 +#define _MM_HINT_ET1 6 +#define _MM_HINT_T0 3 +#define _MM_HINT_T1 2 +#define _MM_HINT_T2 1 #define _MM_HINT_NTA 0 #ifndef _MSC_VER @@ -2068,7 +2070,8 @@ _mm_storer_ps(float *__p, __m128 __a) ///be generated. \n ///_MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will ///be generated. -#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel))) +#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \ + ((sel) >> 2) & 1, (sel) & 0x3)) #endif /// \brief Stores a 64-bit integer in the specified aligned memory location. To Modified: cfe/trunk/lib/Sema/SemaChecking.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=321325&r1=321324&r2=321325&view=diff == --- cfe/trunk/lib/Sema/SemaChecking.cpp (original) +++ cfe/trunk/lib/Sema/SemaChecking.cpp Thu Dec 21 15:50:22 2017 @@ -2278,7 +2278,7 @@ bool Sema::CheckX86BuiltinFunctionCall(u default: return false; case X86::BI_mm_prefetch: -i = 1; l = 0; u = 3; +i = 1; l = 0; u = 7; break; case X86::BI__builtin_ia32_sha1rnds4: case X86::BI__builtin_ia32_shuf_f32x4_256_mask: ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r321341 - [X86] Add 'prfchw' to the correct CPUs to match the backend.
Author: ctopper Date: Thu Dec 21 20:51:00 2017 New Revision: 321341 URL: http://llvm.org/viewvc/llvm-project?rev=321341&view=rev Log: [X86] Add 'prfchw' to the correct CPUs to match the backend. Modified: cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=321341&r1=321340&r2=321341&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Thu Dec 21 20:51:00 2017 @@ -159,6 +159,7 @@ bool X86TargetInfo::initFeatureMap( case CK_Broadwell: setFeatureEnabledImpl(Features, "rdseed", true); setFeatureEnabledImpl(Features, "adx", true); +setFeatureEnabledImpl(Features, "prfchw", true); LLVM_FALLTHROUGH; case CK_Haswell: setFeatureEnabledImpl(Features, "avx2", true); @@ -224,6 +225,7 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "aes", true); setFeatureEnabledImpl(Features, "pclmul", true); setFeatureEnabledImpl(Features, "sse4.2", true); +setFeatureEnabledImpl(Features, "prfchw", true); LLVM_FALLTHROUGH; case CK_Bonnell: setFeatureEnabledImpl(Features, "movbe", true); @@ -241,6 +243,7 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "avx512cd", true); setFeatureEnabledImpl(Features, "avx512er", true); setFeatureEnabledImpl(Features, "avx512pf", true); +setFeatureEnabledImpl(Features, "prfchw", true); setFeatureEnabledImpl(Features, "prefetchwt1", true); setFeatureEnabledImpl(Features, "fxsr", true); setFeatureEnabledImpl(Features, "rdseed", true); Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=321341&r1=321340&r2=321341&view=diff == --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original) +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Thu Dec 21 20:51:00 2017 @@ -589,6 +589,7 @@ // CHECK_BROADWELL_M32: #define __MMX__ 1 // CHECK_BROADWELL_M32: #define __PCLMUL__ 1 // CHECK_BROADWELL_M32: #define __POPCNT__ 1 +// CHECK_BROADWELL_M32: #define __PRFCHW__ 1 // CHECK_BROADWELL_M32: #define __RDRND__ 1 // CHECK_BROADWELL_M32: #define __RDSEED__ 1 // CHECK_BROADWELL_M32: #define __SSE2__ 1 @@ -620,6 +621,7 @@ // CHECK_BROADWELL_M64: #define __MMX__ 1 // CHECK_BROADWELL_M64: #define __PCLMUL__ 1 // CHECK_BROADWELL_M64: #define __POPCNT__ 1 +// CHECK_BROADWELL_M64: #define __PRFCHW__ 1 // CHECK_BROADWELL_M64: #define __RDRND__ 1 // CHECK_BROADWELL_M64: #define __RDSEED__ 1 // CHECK_BROADWELL_M64: #define __SSE2_MATH__ 1 @@ -657,6 +659,7 @@ // CHECK_SKL_M32: #define __MPX__ 1 // CHECK_SKL_M32: #define __PCLMUL__ 1 // CHECK_SKL_M32: #define __POPCNT__ 1 +// CHECK_SKL_M32: #define __PRFCHW__ 1 // CHECK_SKL_M32: #define __RDRND__ 1 // CHECK_SKL_M32: #define __RDSEED__ 1 // CHECK_SKL_M32: #define __RTM__ 1 @@ -690,6 +693,7 @@ // CHECK_SKL_M64: #define __MPX__ 1 // CHECK_SKL_M64: #define __PCLMUL__ 1 // CHECK_SKL_M64: #define __POPCNT__ 1 +// CHECK_SKL_M64: #define __PRFCHW__ 1 // CHECK_SKL_M64: #define __RDRND__ 1 // CHECK_SKL_M64: #define __RDSEED__ 1 // CHECK_SKL_M64: #define __RTM__ 1 @@ -730,6 +734,7 @@ // CHECK_KNL_M32: #define __PCLMUL__ 1 // CHECK_KNL_M32: #define __POPCNT__ 1 // CHECK_KNL_M32: #define __PREFETCHWT1__ 1 +// CHECK_KNL_M32: #define __PRFCHW__ 1 // CHECK_KNL_M32: #define __RDRND__ 1 // CHECK_KNL_M32: #define __RTM__ 1 // CHECK_KNL_M32: #define __SSE2__ 1 @@ -766,6 +771,7 @@ // CHECK_KNL_M64: #define __PCLMUL__ 1 // CHECK_KNL_M64: #define __POPCNT__ 1 // CHECK_KNL_M64: #define __PREFETCHWT1__ 1 +// CHECK_KNL_M64: #define __PRFCHW__ 1 // CHECK_KNL_M64: #define __RDRND__ 1 // CHECK_KNL_M64: #define __RTM__ 1 // CHECK_KNL_M64: #define __SSE2_MATH__ 1 @@ -806,6 +812,7 @@ // CHECK_KNM_M32: #define __PCLMUL__ 1 // CHECK_KNM_M32: #define __POPCNT__ 1 // CHECK_KNM_M32: #define __PREFETCHWT1__ 1 +// CHECK_KNM_M32: #define __PRFCHW__ 1 // CHECK_KNM_M32: #define __RDRND__ 1 // CHECK_KNM_M32: #define __RTM__ 1 // CHECK_KNM_M32: #define __SSE2__ 1 @@ -840,6 +847,7 @@ // CHECK_KNM_M64: #define __PCLMUL__ 1 // CHECK_KNM_M64: #define __POPCNT__ 1 // CHECK_KNM_M64: #define __PREFETCHWT1__ 1 +// CHECK_KNM_M64: #define __PRFCHW__ 1 // CHECK_KNM_M64: #define __RDRND__ 1 // CHECK_KNM_M64: #define __RTM__ 1 // CHECK_KNM_M64: #define __SSE2_MATH__ 1 @@ -879,6 +887,7 @@ // CHECK_SKX_M32: #define __MPX__ 1 // CHECK_SKX_M32: #define __PCLMUL__ 1 // CHECK_SKX_M32: #define __POPCNT__ 1 +// CHECK_SKX_M32: #define __PRFCHW__ 1 // CHECK_SKX_M32: #define __RDRND__ 1 // CHECK_SKX_M32: #define __RTM__ 1 // CHECK_SKX_M32: #define __SGX__ 1 @@ -9
r321343 - [X86] Add missing check lines for the silvermont cases in predefined-arch-macros.c test.
Author: ctopper Date: Thu Dec 21 21:09:38 2017 New Revision: 321343 URL: http://llvm.org/viewvc/llvm-project?rev=321343&view=rev Log: [X86] Add missing check lines for the silvermont cases in predefined-arch-macros.c test. Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=321343&r1=321342&r2=321343&view=diff == --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original) +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Thu Dec 21 21:09:38 2017 @@ -1234,7 +1234,11 @@ // RUN: %clang -march=slm -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SLM_M32 +// CHECK_SLM_M32: #define __AES__ 1 +// CHECK_SLM_M32: #define __FXSR__ 1 // CHECK_SLM_M32: #define __MMX__ 1 +// CHECK_SLM_M32: #define __PCLMUL__ 1 +// CHECK_SLM_M32: #define __POPCNT__ 1 // CHECK_SLM_M32: #define __PRFCHW__ 1 // CHECK_SLM_M32: #define __SSE2__ 1 // CHECK_SLM_M32: #define __SSE3__ 1 @@ -1251,7 +1255,11 @@ // RUN: %clang -march=slm -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SLM_M64 +// CHECK_SLM_M64: #define __AES__ 1 +// CHECK_SLM_M64: #define __FXSR__ 1 // CHECK_SLM_M64: #define __MMX__ 1 +// CHECK_SLM_M64: #define __PCLMUL__ 1 +// CHECK_SLM_M64: #define __POPCNT__ 1 // CHECK_SLM_M64: #define __PRFCHW__ 1 // CHECK_SLM_M64: #define __SSE2_MATH__ 1 // CHECK_SLM_M64: #define __SSE2__ 1 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D41583: [x86][icelake][vaes]
I meant if the command line says “-mvaes -mno-aes” we should make sure to disable vaes On Tue, Dec 26, 2017 at 9:47 AM coby via Phabricator < revi...@reviews.llvm.org> wrote: > coby added inline comments. > > > > Comment at: lib/Basic/Targets/X86.cpp:573 > setMMXLevel(Features, AMD3DNowAthlon, Enabled); >} else if (Name == "aes") { > if (Enabled) > > craig.topper wrote: > > Shouldn't -aes imply -vaes? > how come? perhaps i'm missing here something? > why would the first imply the latter? > following this road an atom z8XXX should be capable of supporting vaes, > for example (where it lacks avx, for example, > https://www.intel.com/content/www/us/en/processors/atom/atom-z8000-datasheet-vol-1.html > ) > also, in that sense, pclmul is implying vpclmulqdq > > > Repository: > rC Clang > > https://reviews.llvm.org/D41583 > > > > -- ~Craig ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r321502 - [X86] Enable avx512vpopcntdq and clwb for icelake.
Author: ctopper Date: Wed Dec 27 14:25:59 2017 New Revision: 321502 URL: http://llvm.org/viewvc/llvm-project?rev=321502&view=rev Log: [X86] Enable avx512vpopcntdq and clwb for icelake. Per table 1-1 of the October 2017 edition of Intel® Architecture Instruction Set Extensions and Future Features Programming Reference Modified: cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=321502&r1=321501&r2=321502&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Wed Dec 27 14:25:59 2017 @@ -132,13 +132,14 @@ bool X86TargetInfo::initFeatureMap( break; case CK_Icelake: -// TODO: Add icelake features here. setFeatureEnabledImpl(Features, "vaes", true); setFeatureEnabledImpl(Features, "gfni", true); setFeatureEnabledImpl(Features, "vpclmulqdq", true); setFeatureEnabledImpl(Features, "avx512bitalg", true); setFeatureEnabledImpl(Features, "avx512vnni", true); setFeatureEnabledImpl(Features, "avx512vbmi2", true); +setFeatureEnabledImpl(Features, "avx512vpopcntdq", true); +setFeatureEnabledImpl(Features, "clwb", true); LLVM_FALLTHROUGH; case CK_Cannonlake: setFeatureEnabledImpl(Features, "avx512ifma", true); Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=321502&r1=321501&r2=321502&view=diff == --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original) +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Wed Dec 27 14:25:59 2017 @@ -1060,10 +1060,12 @@ // CHECK_ICL_M32: #define __AVX512VBMI__ 1 // CHECK_ICL_M32: #define __AVX512VL__ 1 // CHECK_ICL_M32: #define __AVX512VNNI__ 1 +// CHECK_ICL_M32: #define __AVX512VPOPCNTDQ__ 1 // CHECK_ICL_M32: #define __AVX__ 1 // CHECK_ICL_M32: #define __BMI2__ 1 // CHECK_ICL_M32: #define __BMI__ 1 // CHECK_ICL_M32: #define __CLFLUSHOPT__ 1 +// CHECK_ICL_M32: #define __CLWB__ 1 // CHECK_ICL_M32: #define __F16C__ 1 // CHECK_ICL_M32: #define __FMA__ 1 // CHECK_ICL_M32: #define __GFNI__ 1 @@ -,10 +1113,12 @@ // CHECK_ICL_M64: #define __AVX512VBMI__ 1 // CHECK_ICL_M64: #define __AVX512VL__ 1 // CHECK_ICL_M64: #define __AVX512VNNI__ 1 +// CHECK_ICL_M64: #define __AVX512VPOPCNTDQ__ 1 // CHECK_ICL_M64: #define __AVX__ 1 // CHECK_ICL_M64: #define __BMI2__ 1 // CHECK_ICL_M64: #define __BMI__ 1 // CHECK_ICL_M64: #define __CLFLUSHOPT__ 1 +// CHECK_ICL_M64: #define __CLWB__ 1 // CHECK_ICL_M64: #define __F16C__ 1 // CHECK_ICL_M64: #define __FMA__ 1 // CHECK_ICL_M64: #define __GFNI__ 1 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r321503 - [X86] Test that -march=skx enables PKU.
Author: ctopper Date: Wed Dec 27 14:26:00 2017 New Revision: 321503 URL: http://llvm.org/viewvc/llvm-project?rev=321503&view=rev Log: [X86] Test that -march=skx enables PKU. Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=321503&r1=321502&r2=321503&view=diff == --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original) +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Wed Dec 27 14:26:00 2017 @@ -886,6 +886,7 @@ // CHECK_SKX_M32: #define __MMX__ 1 // CHECK_SKX_M32: #define __MPX__ 1 // CHECK_SKX_M32: #define __PCLMUL__ 1 +// CHECK_SKX_M32: #define __PKU__ 1 // CHECK_SKX_M32: #define __POPCNT__ 1 // CHECK_SKX_M32: #define __PRFCHW__ 1 // CHECK_SKX_M32: #define __RDRND__ 1 @@ -929,6 +930,7 @@ // CHECK_SKX_M64: #define __MMX__ 1 // CHECK_SKX_M64: #define __MPX__ 1 // CHECK_SKX_M64: #define __PCLMUL__ 1 +// CHECK_SKX_M64: #define __PKU__ 1 // CHECK_SKX_M64: #define __POPCNT__ 1 // CHECK_SKX_M64: #define __PRFCHW__ 1 // CHECK_SKX_M64: #define __RDRND__ 1 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r321504 - [X86] Don't accidentally enable PKU on cannon lake and icelake or CLWB on cannonlake.
Author: ctopper Date: Wed Dec 27 14:26:01 2017 New Revision: 321504 URL: http://llvm.org/viewvc/llvm-project?rev=321504&view=rev Log: [X86] Don't accidentally enable PKU on cannon lake and icelake or CLWB on cannonlake. We have cannonlake and icelake inheriting from skylake server in a switch using fallthroughs. But they aren't perfect supersets of skylake server. Modified: cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=321504&r1=321503&r2=321504&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Wed Dec 27 14:26:01 2017 @@ -152,8 +152,10 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "avx512dq", true); setFeatureEnabledImpl(Features, "avx512bw", true); setFeatureEnabledImpl(Features, "avx512vl", true); -setFeatureEnabledImpl(Features, "pku", true); -setFeatureEnabledImpl(Features, "clwb", true); +if (Kind == CK_SkylakeServer) { + setFeatureEnabledImpl(Features, "pku", true); + setFeatureEnabledImpl(Features, "clwb", true); +} LLVM_FALLTHROUGH; case CK_SkylakeClient: setFeatureEnabledImpl(Features, "xsavec", true); Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=321504&r1=321503&r2=321504&view=diff == --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original) +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Wed Dec 27 14:26:01 2017 @@ -972,12 +972,14 @@ // CHECK_CNL_M32: #define __BMI2__ 1 // CHECK_CNL_M32: #define __BMI__ 1 // CHECK_CNL_M32: #define __CLFLUSHOPT__ 1 +// CHECK_CNL_M32-NOT: #define __CLWB__ 1 // CHECK_CNL_M32: #define __F16C__ 1 // CHECK_CNL_M32: #define __FMA__ 1 // CHECK_CNL_M32: #define __LZCNT__ 1 // CHECK_CNL_M32: #define __MMX__ 1 // CHECK_CNL_M32: #define __MPX__ 1 // CHECK_CNL_M32: #define __PCLMUL__ 1 +// CHECK_CNL_M32-NOT: #define __PKU__ 1 // CHECK_CNL_M32: #define __POPCNT__ 1 // CHECK_CNL_M32: #define __PRFCHW__ 1 // CHECK_CNL_M32: #define __RDRND__ 1 @@ -1017,12 +1019,14 @@ // CHECK_CNL_M64: #define __BMI2__ 1 // CHECK_CNL_M64: #define __BMI__ 1 // CHECK_CNL_M64: #define __CLFLUSHOPT__ 1 +// CHECK_CNL_M64-NOT: #define __CLWB__ 1 // CHECK_CNL_M64: #define __F16C__ 1 // CHECK_CNL_M64: #define __FMA__ 1 // CHECK_CNL_M64: #define __LZCNT__ 1 // CHECK_CNL_M64: #define __MMX__ 1 // CHECK_CNL_M64: #define __MPX__ 1 // CHECK_CNL_M64: #define __PCLMUL__ 1 +// CHECK_CNL_M64-NOT: #define __PKU__ 1 // CHECK_CNL_M64: #define __POPCNT__ 1 // CHECK_CNL_M64: #define __PRFCHW__ 1 // CHECK_CNL_M64: #define __RDRND__ 1 @@ -1075,6 +1079,7 @@ // CHECK_ICL_M32: #define __MMX__ 1 // CHECK_ICL_M32: #define __MPX__ 1 // CHECK_ICL_M32: #define __PCLMUL__ 1 +// CHECK_ICL_M32-NOT: #define __PKU__ 1 // CHECK_ICL_M32: #define __POPCNT__ 1 // CHECK_ICL_M32: #define __PRFCHW__ 1 // CHECK_ICL_M32: #define __RDRND__ 1 @@ -1128,6 +1133,7 @@ // CHECK_ICL_M64: #define __MMX__ 1 // CHECK_ICL_M64: #define __MPX__ 1 // CHECK_ICL_M64: #define __PCLMUL__ 1 +// CHECK_ICL_M64-NOT: #define __PKU__ 1 // CHECK_ICL_M64: #define __POPCNT__ 1 // CHECK_ICL_M64: #define __PRFCHW__ 1 // CHECK_ICL_M64: #define __RDRND__ 1 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r321547 - Revert r321504 "[X86] Don't accidentally enable PKU on cannon lake and icelake or CLWB on cannonlake."
Author: ctopper Date: Thu Dec 28 22:39:16 2017 New Revision: 321547 URL: http://llvm.org/viewvc/llvm-project?rev=321547&view=rev Log: Revert r321504 "[X86] Don't accidentally enable PKU on cannon lake and icelake or CLWB on cannonlake." I based that commit on what was in Intel's public documentation here https://software.intel.com/sites/default/files/managed/c5/15/architecture-instruction-set-extensions-programming-reference.pdf Which specifically said CLWB wasn't until Icelake. But I've since cross checked with SDE and it thinks these features exist on CNL and ICL. So now I don't know what to believe. I've added test coverage of the current behavior as part of the revert so at least now have proof of what we're doing. Modified: cfe/trunk/lib/Basic/Targets/X86.cpp cfe/trunk/test/Preprocessor/predefined-arch-macros.c Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=321547&r1=321546&r2=321547&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Thu Dec 28 22:39:16 2017 @@ -139,7 +139,6 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "avx512vnni", true); setFeatureEnabledImpl(Features, "avx512vbmi2", true); setFeatureEnabledImpl(Features, "avx512vpopcntdq", true); -setFeatureEnabledImpl(Features, "clwb", true); LLVM_FALLTHROUGH; case CK_Cannonlake: setFeatureEnabledImpl(Features, "avx512ifma", true); @@ -152,10 +151,8 @@ bool X86TargetInfo::initFeatureMap( setFeatureEnabledImpl(Features, "avx512dq", true); setFeatureEnabledImpl(Features, "avx512bw", true); setFeatureEnabledImpl(Features, "avx512vl", true); -if (Kind == CK_SkylakeServer) { - setFeatureEnabledImpl(Features, "pku", true); - setFeatureEnabledImpl(Features, "clwb", true); -} +setFeatureEnabledImpl(Features, "pku", true); +setFeatureEnabledImpl(Features, "clwb", true); LLVM_FALLTHROUGH; case CK_SkylakeClient: setFeatureEnabledImpl(Features, "xsavec", true); Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=321547&r1=321546&r2=321547&view=diff == --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original) +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Thu Dec 28 22:39:16 2017 @@ -972,14 +972,14 @@ // CHECK_CNL_M32: #define __BMI2__ 1 // CHECK_CNL_M32: #define __BMI__ 1 // CHECK_CNL_M32: #define __CLFLUSHOPT__ 1 -// CHECK_CNL_M32-NOT: #define __CLWB__ 1 +// CHECK_CNL_M32: #define __CLWB__ 1 // CHECK_CNL_M32: #define __F16C__ 1 // CHECK_CNL_M32: #define __FMA__ 1 // CHECK_CNL_M32: #define __LZCNT__ 1 // CHECK_CNL_M32: #define __MMX__ 1 // CHECK_CNL_M32: #define __MPX__ 1 // CHECK_CNL_M32: #define __PCLMUL__ 1 -// CHECK_CNL_M32-NOT: #define __PKU__ 1 +// CHECK_CNL_M32: #define __PKU__ 1 // CHECK_CNL_M32: #define __POPCNT__ 1 // CHECK_CNL_M32: #define __PRFCHW__ 1 // CHECK_CNL_M32: #define __RDRND__ 1 @@ -1019,14 +1019,14 @@ // CHECK_CNL_M64: #define __BMI2__ 1 // CHECK_CNL_M64: #define __BMI__ 1 // CHECK_CNL_M64: #define __CLFLUSHOPT__ 1 -// CHECK_CNL_M64-NOT: #define __CLWB__ 1 +// CHECK_CNL_M64: #define __CLWB__ 1 // CHECK_CNL_M64: #define __F16C__ 1 // CHECK_CNL_M64: #define __FMA__ 1 // CHECK_CNL_M64: #define __LZCNT__ 1 // CHECK_CNL_M64: #define __MMX__ 1 // CHECK_CNL_M64: #define __MPX__ 1 // CHECK_CNL_M64: #define __PCLMUL__ 1 -// CHECK_CNL_M64-NOT: #define __PKU__ 1 +// CHECK_CNL_M64: #define __PKU__ 1 // CHECK_CNL_M64: #define __POPCNT__ 1 // CHECK_CNL_M64: #define __PRFCHW__ 1 // CHECK_CNL_M64: #define __RDRND__ 1 @@ -1079,7 +1079,7 @@ // CHECK_ICL_M32: #define __MMX__ 1 // CHECK_ICL_M32: #define __MPX__ 1 // CHECK_ICL_M32: #define __PCLMUL__ 1 -// CHECK_ICL_M32-NOT: #define __PKU__ 1 +// CHECK_ICL_M32: #define __PKU__ 1 // CHECK_ICL_M32: #define __POPCNT__ 1 // CHECK_ICL_M32: #define __PRFCHW__ 1 // CHECK_ICL_M32: #define __RDRND__ 1 @@ -1133,7 +1133,7 @@ // CHECK_ICL_M64: #define __MMX__ 1 // CHECK_ICL_M64: #define __MPX__ 1 // CHECK_ICL_M64: #define __PCLMUL__ 1 -// CHECK_ICL_M64-NOT: #define __PKU__ 1 +// CHECK_ICL_M64: #define __PKU__ 1 // CHECK_ICL_M64: #define __POPCNT__ 1 // CHECK_ICL_M64: #define __PRFCHW__ 1 // CHECK_ICL_M64: #define __RDRND__ 1 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r321749 - [Docs] Re-generate command line documentation, primarily to get the icelake feature command line options in, but there were a couple other changes too.
Author: ctopper Date: Wed Jan 3 10:29:12 2018 New Revision: 321749 URL: http://llvm.org/viewvc/llvm-project?rev=321749&view=rev Log: [Docs] Re-generate command line documentation, primarily to get the icelake feature command line options in, but there were a couple other changes too. Modified: cfe/trunk/docs/ClangCommandLineReference.rst Modified: cfe/trunk/docs/ClangCommandLineReference.rst URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/docs/ClangCommandLineReference.rst?rev=321749&r1=321748&r2=321749&view=diff == --- cfe/trunk/docs/ClangCommandLineReference.rst (original) +++ cfe/trunk/docs/ClangCommandLineReference.rst Wed Jan 3 10:29:12 2018 @@ -120,6 +120,10 @@ Output path for the plist report .. option:: -compatibility\_version +.. option:: --config + +Specifies configuration file + .. option:: --constant-cfstrings .. option:: -coverage, --coverage @@ -1545,6 +1549,10 @@ Enable ARC-style weak references in Obje OpenMP target code is compiled as relocatable using the -c flag. For OpenMP targets the code is relocatable by default. +.. option:: -fopenmp-simd, -fno-openmp-simd + +Emit OpenMP code only for SIMD-based constructs. + .. option:: -fopenmp-use-tls .. option:: -fopenmp-version= @@ -1998,7 +2006,7 @@ Link stack frames through backchain on S .. option:: -mconsole -.. option:: -mcpu=, -mv4 (equivalent to -mcpu=hexagonv4), -mv5 (equivalent to -mcpu=hexagonv5), -mv55 (equivalent to -mcpu=hexagonv55), -mv60 (equivalent to -mcpu=hexagonv60), -mv62 (equivalent to -mcpu=hexagonv62) +.. option:: -mcpu=, -mv4 (equivalent to -mcpu=hexagonv4), -mv5 (equivalent to -mcpu=hexagonv5), -mv55 (equivalent to -mcpu=hexagonv55), -mv60 (equivalent to -mcpu=hexagonv60), -mv62 (equivalent to -mcpu=hexagonv62), -mv65 (equivalent to -mcpu=hexagonv65) .. option:: -mdefault-build-attributes, -mno-default-build-attributes @@ -2328,6 +2336,8 @@ X86 .. option:: -mavx2, -mno-avx2 +.. option:: -mavx512bitalg, -mno-avx512bitalg + .. option:: -mavx512bw, -mno-avx512bw .. option:: -mavx512cd, -mno-avx512cd @@ -2344,8 +2354,12 @@ X86 .. option:: -mavx512vbmi, -mno-avx512vbmi +.. option:: -mavx512vbmi2, -mno-avx512vbmi2 + .. option:: -mavx512vl, -mno-avx512vl +.. option:: -mavx512vnni, -mno-avx512vnni + .. option:: -mavx512vpopcntdq, -mno-avx512vpopcntdq .. option:: -mbmi, -mno-bmi @@ -2370,6 +2384,8 @@ X86 .. option:: -mfxsr, -mno-fxsr +.. option:: -mgfni, -mno-gfni + .. option:: -mibt, -mno-ibt .. option:: -mlwp, -mno-lwp @@ -2424,6 +2440,10 @@ X86 .. option:: -mtbm, -mno-tbm +.. option:: -mvaes, -mno-vaes + +.. option:: -mvpclmulqdq, -mno-vpclmulqdq + .. option:: -mx87, -m80387, -mno-x87 .. option:: -mxop, -mno-xop ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r322038 - [X86] Replace cvt*2mask intrinsics with native IR using 'icmp slt X, zeroinitializer.
Author: ctopper Date: Mon Jan 8 14:37:56 2018 New Revision: 322038 URL: http://llvm.org/viewvc/llvm-project?rev=322038&view=rev Log: [X86] Replace cvt*2mask intrinsics with native IR using 'icmp slt X, zeroinitializer. Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512dq-builtins.c cfe/trunk/test/CodeGen/avx512vlbw-builtins.c cfe/trunk/test/CodeGen/avx512vldq-builtins.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=322038&r1=322037&r2=322038&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jan 8 14:37:56 2018 @@ -7791,7 +7791,9 @@ static Value *EmitX86Select(CodeGenFunct } static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, - bool Signed, SmallVectorImpl &Ops) { + bool Signed, ArrayRef Ops) { + assert((Ops.size() == 2 || Ops.size() == 4) && + "Unexpected number of arguments"); unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); Value *Cmp; @@ -7815,9 +7817,11 @@ static Value *EmitX86MaskedCompare(CodeG Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); } - const auto *C = dyn_cast(Ops.back()); - if (!C || !C->isAllOnesValue()) -Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); + if (Ops.size() == 4) { +const auto *C = dyn_cast(Ops[3]); +if (!C || !C->isAllOnesValue()) + Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops[3], NumElts)); + } if (NumElts < 8) { uint32_t Indices[8]; @@ -7833,6 +7837,11 @@ static Value *EmitX86MaskedCompare(CodeG std::max(NumElts, 8U))); } +static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { + Value *Zero = Constant::getNullValue(In->getType()); + return EmitX86MaskedCompare(CGF, 1, true, { In, Zero }); +} + static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef Ops) { llvm::Type *Ty = Ops[0]->getType(); @@ -8179,6 +8188,20 @@ Value *CodeGenFunction::EmitX86BuiltinEx case X86::BI__builtin_ia32_cvtmask2q512: return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); + case X86::BI__builtin_ia32_cvtb2mask128: + case X86::BI__builtin_ia32_cvtb2mask256: + case X86::BI__builtin_ia32_cvtb2mask512: + case X86::BI__builtin_ia32_cvtw2mask128: + case X86::BI__builtin_ia32_cvtw2mask256: + case X86::BI__builtin_ia32_cvtw2mask512: + case X86::BI__builtin_ia32_cvtd2mask128: + case X86::BI__builtin_ia32_cvtd2mask256: + case X86::BI__builtin_ia32_cvtd2mask512: + case X86::BI__builtin_ia32_cvtq2mask128: + case X86::BI__builtin_ia32_cvtq2mask256: + case X86::BI__builtin_ia32_cvtq2mask512: +return EmitX86ConvertToMask(*this, Ops[0]); + case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: case X86::BI__builtin_ia32_storeaps128_mask: Modified: cfe/trunk/test/CodeGen/avx512bw-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512bw-builtins.c?rev=322038&r1=322037&r2=322038&view=diff == --- cfe/trunk/test/CodeGen/avx512bw-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512bw-builtins.c Mon Jan 8 14:37:56 2018 @@ -1743,7 +1743,8 @@ __mmask32 test_mm512_mask_testn_epi16_ma __mmask64 test_mm512_movepi8_mask(__m512i __A) { // CHECK-LABEL: @test_mm512_movepi8_mask - // CHECK: @llvm.x86.avx512.cvtb2mask.512 + // CHECK: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer + // CHECK: bitcast <64 x i1> [[CMP]] to i64 return _mm512_movepi8_mask(__A); } @@ -1941,7 +1942,8 @@ __m512i test_mm512_sad_epu8(__m512i __A, __mmask32 test_mm512_movepi16_mask(__m512i __A) { // CHECK-LABEL: @test_mm512_movepi16_mask - // CHECK: @llvm.x86.avx512.cvtw2mask.512 + // CHECK: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer + // CHECK: bitcast <32 x i1> [[CMP]] to i32 return _mm512_movepi16_mask(__A); } Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=322038&r1=322037&r2=322038&view=diff == --- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Mon Jan 8 14:37:56 2018 @@ -923,7 +923,8 @@ __m128d test_mm_maskz_reduce_round_sd(__ __mmask16 test_mm512_movepi32_mask(__m512i __A) { // CHECK-LABEL: @test_mm512_movepi32_mask - // CHECK: @llvm.x86.avx512.cvtd2mask.512 + // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer + // CHECK: bitcast <16 x i1> [[CMP]] to i16 return
r322244 - [X86][Sema] Remove constant range checks on on builtins that take a char.
Author: ctopper Date: Wed Jan 10 17:37:57 2018 New Revision: 322244 URL: http://llvm.org/viewvc/llvm-project?rev=322244&view=rev Log: [X86][Sema] Remove constant range checks on on builtins that take a char. The constant is already reduced to 8-bits by the time we get here and the checks were just ensuring that it was 8 bits. Thus I don't think there's anyway for them to fail. Modified: cfe/trunk/lib/Sema/SemaChecking.cpp Modified: cfe/trunk/lib/Sema/SemaChecking.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=322244&r1=322243&r2=322244&view=diff == --- cfe/trunk/lib/Sema/SemaChecking.cpp (original) +++ cfe/trunk/lib/Sema/SemaChecking.cpp Wed Jan 10 17:37:57 2018 @@ -2361,13 +2361,6 @@ bool Sema::CheckX86BuiltinFunctionCall(u case X86::BI__builtin_ia32_cmpss_mask: i = 2; l = 0; u = 31; break; - case X86::BI__builtin_ia32_xabort: -i = 0; l = -128; u = 255; -break; - case X86::BI__builtin_ia32_pshufw: - case X86::BI__builtin_ia32_aeskeygenassist128: -i = 1; l = -128; u = 255; -break; case X86::BI__builtin_ia32_vcvtps2ph: case X86::BI__builtin_ia32_vcvtps2ph_mask: case X86::BI__builtin_ia32_vcvtps2ph256: @@ -2405,27 +2398,6 @@ bool Sema::CheckX86BuiltinFunctionCall(u case X86::BI__builtin_ia32_fpcla_mask: i = 1; l = 0; u = 255; break; - case X86::BI__builtin_ia32_palignr: - case X86::BI__builtin_ia32_insertps128: - case X86::BI__builtin_ia32_dpps: - case X86::BI__builtin_ia32_dppd: - case X86::BI__builtin_ia32_dpps256: - case X86::BI__builtin_ia32_mpsadbw128: - case X86::BI__builtin_ia32_mpsadbw256: - case X86::BI__builtin_ia32_pcmpistrm128: - case X86::BI__builtin_ia32_pcmpistri128: - case X86::BI__builtin_ia32_pcmpistria128: - case X86::BI__builtin_ia32_pcmpistric128: - case X86::BI__builtin_ia32_pcmpistrio128: - case X86::BI__builtin_ia32_pcmpistris128: - case X86::BI__builtin_ia32_pcmpistriz128: - case X86::BI__builtin_ia32_pclmulqdq128: - case X86::BI__builtin_ia32_vperm2f128_pd256: - case X86::BI__builtin_ia32_vperm2f128_ps256: - case X86::BI__builtin_ia32_vperm2f128_si256: - case X86::BI__builtin_ia32_permti256: -i = 2; l = -128; u = 255; -break; case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: case X86::BI__builtin_ia32_palignr512_mask: @@ -2480,15 +2452,6 @@ bool Sema::CheckX86BuiltinFunctionCall(u case X86::BI__builtin_ia32_scatterpfqps: i = 4; l = 2; u = 3; break; - case X86::BI__builtin_ia32_pcmpestrm128: - case X86::BI__builtin_ia32_pcmpestri128: - case X86::BI__builtin_ia32_pcmpestria128: - case X86::BI__builtin_ia32_pcmpestric128: - case X86::BI__builtin_ia32_pcmpestrio128: - case X86::BI__builtin_ia32_pcmpestris128: - case X86::BI__builtin_ia32_pcmpestriz128: -i = 4; l = -128; u = 255; -break; case X86::BI__builtin_ia32_rndscalesd_round_mask: case X86::BI__builtin_ia32_rndscaless_round_mask: i = 4; l = 0; u = 255; ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r322245 - [X86] Make -mavx512f imply -mfma and -mf16c in the frontend like it does in the backend.
Author: ctopper Date: Wed Jan 10 17:37:59 2018 New Revision: 322245 URL: http://llvm.org/viewvc/llvm-project?rev=322245&view=rev Log: [X86] Make -mavx512f imply -mfma and -mf16c in the frontend like it does in the backend. Similarly, make -mno-fma and -mno-f16c imply -mno-avx512f. Withou this "-mno-sse -mavx512f" ends up with avx512f being enabled in the frontend but disabled in the backend. Modified: cfe/trunk/lib/Basic/Targets/X86.cpp Modified: cfe/trunk/lib/Basic/Targets/X86.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=322245&r1=322244&r2=322245&view=diff == --- cfe/trunk/lib/Basic/Targets/X86.cpp (original) +++ cfe/trunk/lib/Basic/Targets/X86.cpp Wed Jan 10 17:37:59 2018 @@ -430,7 +430,7 @@ void X86TargetInfo::setSSELevel(llvm::St if (Enabled) { switch (Level) { case AVX512F: - Features["avx512f"] = true; + Features["avx512f"] = Features["fma"] = Features["f16c"] = true; LLVM_FALLTHROUGH; case AVX2: Features["avx2"] = true; @@ -644,6 +644,8 @@ void X86TargetInfo::setFeatureEnabledImp } else if (Name == "fma") { if (Enabled) setSSELevel(Features, AVX, Enabled); +else + setSSELevel(Features, AVX512F, Enabled); } else if (Name == "fma4") { setXOPLevel(Features, FMA4, Enabled); } else if (Name == "xop") { @@ -653,6 +655,8 @@ void X86TargetInfo::setFeatureEnabledImp } else if (Name == "f16c") { if (Enabled) setSSELevel(Features, AVX, Enabled); +else + setSSELevel(Features, AVX512F, Enabled); } else if (Name == "sha") { if (Enabled) setSSELevel(Features, SSE2, Enabled); ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r322247 - [X86][Sema] Range check the constant argument for the vpshld/vpshrd builtins to ensure it fits in 8-bits.
Author: ctopper Date: Wed Jan 10 17:38:02 2018 New Revision: 322247 URL: http://llvm.org/viewvc/llvm-project?rev=322247&view=rev Log: [X86][Sema] Range check the constant argument for the vpshld/vpshrd builtins to ensure it fits in 8-bits. Modified: cfe/trunk/lib/Sema/SemaChecking.cpp cfe/trunk/test/Sema/builtins-x86.c Modified: cfe/trunk/lib/Sema/SemaChecking.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=322247&r1=322246&r2=322247&view=diff == --- cfe/trunk/lib/Sema/SemaChecking.cpp (original) +++ cfe/trunk/lib/Sema/SemaChecking.cpp Wed Jan 10 17:38:02 2018 @@ -2410,6 +2410,24 @@ bool Sema::CheckX86BuiltinFunctionCall(u case X86::BI__builtin_ia32_dbpsadbw128_mask: case X86::BI__builtin_ia32_dbpsadbw256_mask: case X86::BI__builtin_ia32_dbpsadbw512_mask: + case X86::BI__builtin_ia32_vpshldd128_mask: + case X86::BI__builtin_ia32_vpshldd256_mask: + case X86::BI__builtin_ia32_vpshldd512_mask: + case X86::BI__builtin_ia32_vpshldq128_mask: + case X86::BI__builtin_ia32_vpshldq256_mask: + case X86::BI__builtin_ia32_vpshldq512_mask: + case X86::BI__builtin_ia32_vpshldw128_mask: + case X86::BI__builtin_ia32_vpshldw256_mask: + case X86::BI__builtin_ia32_vpshldw512_mask: + case X86::BI__builtin_ia32_vpshrdd128_mask: + case X86::BI__builtin_ia32_vpshrdd256_mask: + case X86::BI__builtin_ia32_vpshrdd512_mask: + case X86::BI__builtin_ia32_vpshrdq128_mask: + case X86::BI__builtin_ia32_vpshrdq256_mask: + case X86::BI__builtin_ia32_vpshrdq512_mask: + case X86::BI__builtin_ia32_vpshrdw128_mask: + case X86::BI__builtin_ia32_vpshrdw256_mask: + case X86::BI__builtin_ia32_vpshrdw512_mask: i = 2; l = 0; u = 255; break; case X86::BI__builtin_ia32_fixupimmpd512_mask: Modified: cfe/trunk/test/Sema/builtins-x86.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/builtins-x86.c?rev=322247&r1=322246&r2=322247&view=diff == --- cfe/trunk/test/Sema/builtins-x86.c (original) +++ cfe/trunk/test/Sema/builtins-x86.c Wed Jan 10 17:38:02 2018 @@ -4,12 +4,17 @@ typedef long long __m128i __attribute__( typedef float __m128 __attribute__((__vector_size__(16))); typedef double __m128d __attribute__((__vector_size__(16))); +typedef long long __m256i __attribute__((__vector_size__(32))); +typedef float __m256 __attribute__((__vector_size__(32))); +typedef double __m256d __attribute__((__vector_size__(32))); + typedef long long __m512i __attribute__((__vector_size__(64))); typedef float __m512 __attribute__((__vector_size__(64))); typedef double __m512d __attribute__((__vector_size__(64))); typedef unsigned char __mmask8; typedef unsigned short __mmask16; +typedef unsigned int __mmask32; __m128 test__builtin_ia32_cmpps(__m128 __a, __m128 __b) { __builtin_ia32_cmpps(__a, __b, 32); // expected-error {{argument should be a value from 0 to 31}} @@ -83,3 +88,74 @@ __m512 _mm512_mask_prefetch_i32gather_ps return __builtin_ia32_gatherpfdps(mask, index, addr, 1, 1); // expected-error {{argument should be a value from 2 to 3}} } +__m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { + return __builtin_ia32_vpshldq512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +} + +__m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + return __builtin_ia32_vpshldd512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +} + +__m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { + return __builtin_ia32_vpshldw512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +} + +__m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { + return __builtin_ia32_vpshrdq512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +} + +__m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { + return __builtin_ia32_vpshrdd512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +} + +__m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { + return __builtin_ia32_vpshrdw512_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +} + +__m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { + return __builtin_ia32_vpshldq256_mask(__A, __B, 1024, __S, __U); // expected-error {{argument should be a value from 0 to 255}} +} + +__m128i test_mm128_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { + return __builtin_ia32_vpshldq128_mask(__A, __B, 1024, __S, __U); // expected-er
r322246 - [X86] Fix vpshrd builtins to require an ICE for their constant argument to match vpshld.
Author: ctopper Date: Wed Jan 10 17:38:00 2018 New Revision: 322246 URL: http://llvm.org/viewvc/llvm-project?rev=322246&view=rev Log: [X86] Fix vpshrd builtins to require an ICE for their constant argument to match vpshld. Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=322246&r1=322245&r2=322246&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Jan 10 17:38:00 2018 @@ -1357,15 +1357,15 @@ TARGET_BUILTIN(__builtin_ia32_vpshrdvw12 TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iiV4iUc", "", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iiV8iUc", "", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iiV16iUs", "", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiiV2LLiUc", "", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiiV4LLiUc", "", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiiV8LLiUc", "", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8siV8sUc", "", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16siV16sUs", "", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32siV32sUi", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iIiV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iIiV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iIiV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8sIiV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16sIiV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32sIiV32sUi", "", "avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "", "avx512bw") ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r326807 - [X86] Fix typo in cpuid.h, bit_AVX51SER->bit_AVX512ER.
Author: ctopper Date: Tue Mar 6 08:06:44 2018 New Revision: 326807 URL: http://llvm.org/viewvc/llvm-project?rev=326807&view=rev Log: [X86] Fix typo in cpuid.h, bit_AVX51SER->bit_AVX512ER. Modified: cfe/trunk/lib/Headers/cpuid.h Modified: cfe/trunk/lib/Headers/cpuid.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/cpuid.h?rev=326807&r1=326806&r2=326807&view=diff == --- cfe/trunk/lib/Headers/cpuid.h (original) +++ cfe/trunk/lib/Headers/cpuid.h Tue Mar 6 08:06:44 2018 @@ -166,7 +166,7 @@ #define bit_CLFLUSHOPT 0x0080 #define bit_CLWB0x0100 #define bit_AVX512PF0x0400 -#define bit_AVX51SER0x0800 +#define bit_AVX512ER0x0800 #define bit_AVX512CD0x1000 #define bit_SHA 0x2000 #define bit_AVX512BW0x4000 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333563 - [X86] Remove 'return' from a bunch of intrinsics that return void and use a builtin that returns void.
Author: ctopper Date: Wed May 30 10:23:45 2018 New Revision: 333563 URL: http://llvm.org/viewvc/llvm-project?rev=333563&view=rev Log: [X86] Remove 'return' from a bunch of intrinsics that return void and use a builtin that returns void. Found by running the intrinsic headers through -pedantic -ansi. Modified: cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/lib/Headers/fxsrintrin.h cfe/trunk/lib/Headers/ia32intrin.h cfe/trunk/lib/Headers/immintrin.h cfe/trunk/lib/Headers/pkuintrin.h cfe/trunk/lib/Headers/xmmintrin.h cfe/trunk/lib/Headers/xsaveintrin.h cfe/trunk/lib/Headers/xsaveoptintrin.h Modified: cfe/trunk/lib/Headers/avx512vlintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=333563&r1=333562&r2=333563&view=diff == --- cfe/trunk/lib/Headers/avx512vlintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vlintrin.h Wed May 30 10:23:45 2018 @@ -7432,7 +7432,7 @@ _mm256_maskz_cvtusepi64_epi16 (__mmask8 static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { - return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); + __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } static __inline__ __m128i __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Headers/emmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=333563&r1=333562&r2=333563&view=diff == --- cfe/trunk/lib/Headers/emmintrin.h (original) +++ cfe/trunk/lib/Headers/emmintrin.h Wed May 30 10:23:45 2018 @@ -1979,7 +1979,7 @@ _mm_store1_pd(double *__dp, __m128d __a) static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, __m128d __a) { - return _mm_store1_pd(__dp, __a); + _mm_store1_pd(__dp, __a); } /// Stores a 128-bit vector of [2 x double] into an unaligned memory Modified: cfe/trunk/lib/Headers/fxsrintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/fxsrintrin.h?rev=333563&r1=333562&r2=333563&view=diff == --- cfe/trunk/lib/Headers/fxsrintrin.h (original) +++ cfe/trunk/lib/Headers/fxsrintrin.h Wed May 30 10:23:45 2018 @@ -43,7 +43,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _fxsave(void *__p) { - return __builtin_ia32_fxsave(__p); + __builtin_ia32_fxsave(__p); } /// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte @@ -61,7 +61,7 @@ _fxsave(void *__p) static __inline__ void __DEFAULT_FN_ATTRS _fxrstor(void *__p) { - return __builtin_ia32_fxrstor(__p); + __builtin_ia32_fxrstor(__p); } #ifdef __x86_64__ @@ -78,7 +78,7 @@ _fxrstor(void *__p) static __inline__ void __DEFAULT_FN_ATTRS _fxsave64(void *__p) { - return __builtin_ia32_fxsave64(__p); + __builtin_ia32_fxsave64(__p); } /// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte @@ -96,7 +96,7 @@ _fxsave64(void *__p) static __inline__ void __DEFAULT_FN_ATTRS _fxrstor64(void *__p) { - return __builtin_ia32_fxrstor64(__p); + __builtin_ia32_fxrstor64(__p); } #endif Modified: cfe/trunk/lib/Headers/ia32intrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/ia32intrin.h?rev=333563&r1=333562&r2=333563&view=diff == --- cfe/trunk/lib/Headers/ia32intrin.h (original) +++ cfe/trunk/lib/Headers/ia32intrin.h Wed May 30 10:23:45 2018 @@ -72,7 +72,7 @@ __rdtscp(unsigned int *__A) { static __inline__ void __attribute__((__always_inline__, __nodebug__)) _wbinvd(void) { - return __builtin_ia32_wbinvd(); + __builtin_ia32_wbinvd(); } #endif /* __IA32INTRIN_H */ Modified: cfe/trunk/lib/Headers/immintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=333563&r1=333562&r2=333563&view=diff == --- cfe/trunk/lib/Headers/immintrin.h (original) +++ cfe/trunk/lib/Headers/immintrin.h Wed May 30 10:23:45 2018 @@ -282,25 +282,25 @@ _readgsbase_u64(void) static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writefsbase_u32(unsigned int __V) { - return __builtin_ia32_wrfsbase32(__V); + __builtin_ia32_wrfsbase32(__V); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writefsbase_u64(unsigned long long __V) { - return __builtin_ia32_wrfsbase64(__V); + __builtin_ia32_wrfsbase64(__V); } static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) _writegsbase_u32(unsigned int __V) { - return __builtin_ia32_wrgsbase32(__V); + __builtin_ia32_wrgsbase32(__V); } static __inline__ void _
r333568 - [X86] Reduce the number of setzero intrinsics to just the set defined by the Intel Intrinsics Guide.
Author: ctopper Date: Wed May 30 11:02:11 2018 New Revision: 333568 URL: http://llvm.org/viewvc/llvm-project?rev=333568&view=rev Log: [X86] Reduce the number of setzero intrinsics to just the set defined by the Intel Intrinsics Guide. We had quite a few for different element sizes of integers sometimes with strange target features attached to them. We only need a single version for each of _m128i, _m256i, and _m512i with the target feature that first introduced those types. Modified: cfe/trunk/lib/Headers/avx512bitalgintrin.h cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vbmi2intrin.h cfe/trunk/lib/Headers/avx512vlbwintrin.h cfe/trunk/lib/Headers/avx512vlcdintrin.h cfe/trunk/lib/Headers/avx512vldqintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/lib/Headers/gfniintrin.h cfe/trunk/lib/Headers/mmintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/Headers/x86intrin-2.c Modified: cfe/trunk/lib/Headers/avx512bitalgintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bitalgintrin.h?rev=333568&r1=333567&r2=333568&view=diff == --- cfe/trunk/lib/Headers/avx512bitalgintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bitalgintrin.h Wed May 30 11:02:11 2018 @@ -48,7 +48,7 @@ _mm512_mask_popcnt_epi16(__m512i __A, __ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) { - return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(), + return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(), __U, __B); } @@ -70,7 +70,7 @@ _mm512_mask_popcnt_epi8(__m512i __A, __m static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) { - return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(), + return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(), __U, __B); } Modified: cfe/trunk/lib/Headers/avx512bwintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=333568&r1=333567&r2=333568&view=diff == --- cfe/trunk/lib/Headers/avx512bwintrin.h (original) +++ cfe/trunk/lib/Headers/avx512bwintrin.h Wed May 30 11:02:11 2018 @@ -34,26 +34,6 @@ typedef unsigned long long __mmask64; /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) -static __inline __m512i __DEFAULT_FN_ATTRS -_mm512_setzero_qi(void) { - return (__m512i)(__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }; -} - -static __inline __m512i __DEFAULT_FN_ATTRS -_mm512_setzero_hi(void) { - return (__m512i)(__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }; -} - /* Integer compare */ #define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \ @@ -212,7 +192,7 @@ static __inline__ __m512i __DEFAULT_FN_A _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -231,7 +211,7 @@ static __inline__ __m512i __DEFAULT_FN_A _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -250,7 +230,7 @@ static __inline__ __m512i __DEFAULT_FN_A _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzer
r333572 - [X86] Simplify the implementation of _mm_sqrt_ss, _mm_rcp_ss, and _mm_rsqrt_ss.
Author: ctopper Date: Wed May 30 11:27:07 2018 New Revision: 333572 URL: http://llvm.org/viewvc/llvm-project?rev=333572&view=rev Log: [X86] Simplify the implementation of _mm_sqrt_ss, _mm_rcp_ss, and _mm_rsqrt_ss. We don't need the insertion back into the original vector at the end. The builtin already understands that. This is different than _mm_sqrt_sd which takes two arguments and we do need to insert. Modified: cfe/trunk/lib/Headers/xmmintrin.h cfe/trunk/test/CodeGen/sse-builtins.c Modified: cfe/trunk/lib/Headers/xmmintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/xmmintrin.h?rev=333572&r1=333571&r2=333572&view=diff == --- cfe/trunk/lib/Headers/xmmintrin.h (original) +++ cfe/trunk/lib/Headers/xmmintrin.h Wed May 30 11:27:07 2018 @@ -224,8 +224,7 @@ _mm_div_ps(__m128 __a, __m128 __b) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ss(__m128 __a) { - __m128 __c = __builtin_ia32_sqrtss((__v4sf)__a); - return (__m128) { __c[0], __a[1], __a[2], __a[3] }; + return (__m128)__builtin_ia32_sqrtss((__v4sf)__a); } /// Calculates the square roots of the values stored in a 128-bit vector @@ -260,8 +259,7 @@ _mm_sqrt_ps(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ss(__m128 __a) { - __m128 __c = __builtin_ia32_rcpss((__v4sf)__a); - return (__m128) { __c[0], __a[1], __a[2], __a[3] }; + return (__m128)__builtin_ia32_rcpss((__v4sf)__a); } /// Calculates the approximate reciprocals of the values stored in a @@ -278,7 +276,7 @@ _mm_rcp_ss(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ps(__m128 __a) { - return __builtin_ia32_rcpps((__v4sf)__a); + return (__m128)__builtin_ia32_rcpps((__v4sf)__a); } /// Calculates the approximate reciprocal of the square root of the value @@ -297,8 +295,7 @@ _mm_rcp_ps(__m128 __a) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ss(__m128 __a) { - __m128 __c = __builtin_ia32_rsqrtss((__v4sf)__a); - return (__m128) { __c[0], __a[1], __a[2], __a[3] }; + return __builtin_ia32_rsqrtss((__v4sf)__a); } /// Calculates the approximate reciprocals of the square roots of the Modified: cfe/trunk/test/CodeGen/sse-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse-builtins.c?rev=333572&r1=333571&r2=333572&view=diff == --- cfe/trunk/test/CodeGen/sse-builtins.c (original) +++ cfe/trunk/test/CodeGen/sse-builtins.c Wed May 30 11:27:07 2018 @@ -508,14 +508,6 @@ __m128 test_mm_rcp_ps(__m128 x) { __m128 test_mm_rcp_ss(__m128 x) { // CHECK-LABEL: test_mm_rcp_ss // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}}) - // CHECK: extractelement <4 x float> {{.*}}, i32 0 - // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 - // CHECK: extractelement <4 x float> {{.*}}, i32 1 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 - // CHECK: extractelement <4 x float> {{.*}}, i32 2 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 - // CHECK: extractelement <4 x float> {{.*}}, i32 3 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 return _mm_rcp_ss(x); } @@ -528,14 +520,6 @@ __m128 test_mm_rsqrt_ps(__m128 x) { __m128 test_mm_rsqrt_ss(__m128 x) { // CHECK-LABEL: test_mm_rsqrt_ss // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}}) - // CHECK: extractelement <4 x float> {{.*}}, i32 0 - // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 - // CHECK: extractelement <4 x float> {{.*}}, i32 1 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 - // CHECK: extractelement <4 x float> {{.*}}, i32 2 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 - // CHECK: extractelement <4 x float> {{.*}}, i32 3 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 return _mm_rsqrt_ss(x); } @@ -662,14 +646,6 @@ __m128 test_mm_sqrt_ps(__m128 x) { __m128 test_sqrt_ss(__m128 x) { // CHECK: define {{.*}} @test_sqrt_ss // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss - // CHECK: extractelement <4 x float> {{.*}}, i32 0 - // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 - // CHECK: extractelement <4 x float> {{.*}}, i32 1 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 - // CHECK: extractelement <4 x float> {{.*}}, i32 2 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 - // CHECK: extractelement <4 x float> {{.*}}, i32 3 - // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 return _mm_sqrt_ss(x); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333593 - [X86] Add __extension__ to a bunch of places in our intrinsic headers that fail if you run it through -pedantic -ansi.
Author: ctopper Date: Wed May 30 14:08:27 2018 New Revision: 333593 URL: http://llvm.org/viewvc/llvm-project?rev=333593&view=rev Log: [X86] Add __extension__ to a bunch of places in our intrinsic headers that fail if you run it through -pedantic -ansi. All of these are lines that create a 'compound literal' to concatenate elements together. Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avxintrin.h cfe/trunk/lib/Headers/emmintrin.h cfe/trunk/lib/Headers/xmmintrin.h Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=333593&r1=333592&r2=333593&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Wed May 30 14:08:27 2018 @@ -180,7 +180,7 @@ typedef enum static __inline __m512i __DEFAULT_FN_ATTRS _mm512_setzero_si512(void) { - return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; + return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; } #define _mm512_setzero_epi32 _mm512_setzero_si512 @@ -262,8 +262,8 @@ _mm512_maskz_broadcastq_epi64 (__mmask8 static __inline __m512 __DEFAULT_FN_ATTRS _mm512_setzero_ps(void) { - return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; + return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } #define _mm512_setzero _mm512_setzero_ps @@ -271,49 +271,52 @@ _mm512_setzero_ps(void) static __inline __m512d __DEFAULT_FN_ATTRS _mm512_setzero_pd(void) { - return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; + return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } static __inline __m512 __DEFAULT_FN_ATTRS _mm512_set1_ps(float __w) { - return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w }; + return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w, __w, __w }; } static __inline __m512d __DEFAULT_FN_ATTRS _mm512_set1_pd(double __w) { - return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; + return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; } static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi8(char __w) { - return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w }; + return __extension__ (__m512i)(__v64qi){ +__w, __w, __w, __w, __w, __w, __w, __w, +__w, __w, __w, __w, __w, __w, __w, __w, +__w, __w, __w, __w, __w, __w, __w, __w, +__w, __w, __w, __w, __w, __w, __w, __w, +__w, __w, __w, __w, __w, __w, __w, __w, +__w, __w, __w, __w, __w, __w, __w, __w, +__w, __w, __w, __w, __w, __w, __w, __w, +__w, __w, __w, __w, __w, __w, __w, __w }; } static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi16(short __w) { - return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w, - __w, __w, __w, __w, __w, __w, __w, __w }; + return __extension__ (__m512i)(__v32hi){ +__w, __w, __w, __w, __w, __w, __w, __w, +__w, __w, __w, __w, __w, __w, __w, __w, +__w, __w, __w, __w, __w, __w, __w, __w, +__w, __w, __w, __w, __w, __w, __w, __w }; } static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi32(int __s) { - return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, - __s, __s, __s, __s, __s, __s, __s, __s }; + return __extension__ (__m512i)(__v16si){ +__s, __s, __s, __s, __s, __s, __s, __s, +__s, __s, __s, __s, __s, __s, __s, __s }; } static __inline __m512i __DEFAULT_FN_ATTRS @@ -327,7 +330,7 @@ _mm512_maskz_set1_epi32(__mmask16 __M, i static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set1_epi64(long long __d) { - return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; + return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; } static __inline __m512i __DEFAULT_FN_ATTRS @@ -349,7 +352,7 @@ _mm512_broadcastss_ps(__m128 __A) static __inline __m512i __DEFAULT_FN_ATTRS _mm512_set4_epi32 (int __A, int __B, int __C, int __D
r333603 - [X86] Use C style comments in intrinsic headers for overall consistency.
Author: ctopper Date: Wed May 30 15:33:21 2018 New Revision: 333603 URL: http://llvm.org/viewvc/llvm-project?rev=333603&view=rev Log: [X86] Use C style comments in intrinsic headers for overall consistency. Most of the origial comments used C style /* */ comments, but some C++ // comments had snuck in over time. Still need to convert all the doxygen comments. Which is much harder to do. Modified: cfe/trunk/lib/Headers/avx512erintrin.h cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/f16cintrin.h cfe/trunk/lib/Headers/gfniintrin.h cfe/trunk/lib/Headers/movdirintrin.h cfe/trunk/lib/Headers/vpclmulqdqintrin.h Modified: cfe/trunk/lib/Headers/avx512erintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512erintrin.h?rev=333603&r1=333602&r2=333603&view=diff == --- cfe/trunk/lib/Headers/avx512erintrin.h (original) +++ cfe/trunk/lib/Headers/avx512erintrin.h Wed May 30 15:33:21 2018 @@ -27,7 +27,7 @@ #ifndef __AVX512ERINTRIN_H #define __AVX512ERINTRIN_H -// exp2a23 +/* exp2a23 */ #define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \ (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ @@ -76,7 +76,7 @@ #define _mm512_maskz_exp2a23_ps(M, A) \ _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) -// rsqrt28 +/* rsqrt28 */ #define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \ (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ @@ -179,7 +179,7 @@ #define _mm_maskz_rsqrt28_sd(M, A, B) \ _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) -// rcp28 +/* rcp28 */ #define _mm512_rcp28_round_pd(A, R) __extension__ ({ \ (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ (__v8df)_mm512_setzero_pd(), \ @@ -282,4 +282,4 @@ #define _mm_maskz_rcp28_sd(M, A, B) \ _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) -#endif // __AVX512ERINTRIN_H +#endif /* __AVX512ERINTRIN_H */ Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=333603&r1=333602&r2=333603&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Wed May 30 15:33:21 2018 @@ -9651,19 +9651,20 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFF),(__v8di)__A); } -// Vector-reduction arithmetic accepts vectors as inputs and produces scalars as -// outputs. This class of vector operation forms the basis of many scientific -// computations. In vector-reduction arithmetic, the evaluation off is -// independent of the order of the input elements of V. - -// Used bisection method. At each step, we partition the vector with previous -// step in half, and the operation is performed on its two halves. -// This takes log2(n) steps where n is the number of elements in the vector. - -// Vec512 - Vector with size 512. -// Operator - Can be one of following: +,*,&,| -// T2 - Can get 'i' for int and 'f' for float. -// T1 - Can get 'i' for int and 'd' for double. +/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as + * outputs. This class of vector operation forms the basis of many scientific + * computations. In vector-reduction arithmetic, the evaluation off is + * independent of the order of the input elements of V. + + * Used bisection method. At each step, we partition the vector with previous + * step in half, and the operation is performed on its two halves. + * This takes log2(n) steps where n is the number of elements in the vector. + + * Vec512 - Vector with size 512. + * Operator - Can be one of following: +,*,&,| + * T2 - Can get 'i' for int and 'f' for float. + * T1 - Can get 'i' for int and 'd' for double. + */ #define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \ __extension__({ \ @@ -9717,14 +9718,15 @@ static __inline__ double __DEFAULT_FN_AT _mm512_reduce_operator_64bit(__W, *, f, d); } -// Vec512 - Vector with size 512. -// Vec512Neutral - All vector elements set to the identity element. -// Identity element: {+,0},{*,1},{&,0x},{|,0} -// Operator - Can be one of following: +,*,&,| -// Mask - Intrinsic Mask -// T2 - Can get 'i' for int and 'f' for float. -// T1 - Can get 'i' for int and 'd' for packed double-precision. -// T3 - Can be Pd for packed double or q for q-word. +/* Vec512 - Vector with size 512. + * Vec512Neutral - All vector elements set to the identity element. + * Identity element: {+,0},{*,1},{&,0xF
r333615 - [X86] Fix some places where macro arguments to intrinsics weren't cast to _m512(i|d)/_m256(i|d/_m128(i|d) first.
Author: ctopper Date: Wed May 30 18:24:40 2018 New Revision: 333615 URL: http://llvm.org/viewvc/llvm-project?rev=333615&view=rev Log: [X86] Fix some places where macro arguments to intrinsics weren't cast to _m512(i|d)/_m256(i|d/_m128(i|d) first. The majority of the cases were correct. This fixes the few that weren't. I also removed some superfluous parentheses in non-macros that confused by attempts at grepping for missing casts. Modified: cfe/trunk/lib/Headers/__wmmintrin_pclmul.h cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vbmi2intrin.h cfe/trunk/lib/Headers/avx512vldqintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/lib/Headers/avx512vlvbmi2intrin.h Modified: cfe/trunk/lib/Headers/__wmmintrin_pclmul.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__wmmintrin_pclmul.h?rev=333615&r1=333614&r2=333615&view=diff == --- cfe/trunk/lib/Headers/__wmmintrin_pclmul.h (original) +++ cfe/trunk/lib/Headers/__wmmintrin_pclmul.h Wed May 30 18:24:40 2018 @@ -55,8 +55,8 @@ ///Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used. /// \returns The 128-bit integer vector containing the result of the carry-less ///multiplication of the selected 64-bit values. -#define _mm_clmulepi64_si128(__X, __Y, __I) \ - ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \ -(__v2di)(__m128i)(__Y), (char)(__I))) +#define _mm_clmulepi64_si128(X, Y, I) \ + ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \ +(__v2di)(__m128i)(Y), (char)(I))) #endif /* __WMMINTRIN_PCLMUL_H */ Modified: cfe/trunk/lib/Headers/avx512dqintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=333615&r1=333614&r2=333615&view=diff == --- cfe/trunk/lib/Headers/avx512dqintrin.h (original) +++ cfe/trunk/lib/Headers/avx512dqintrin.h Wed May 30 18:24:40 2018 @@ -1119,7 +1119,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __ #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ - (__v8sf)(W)) + (__v8sf)(__m256)(W)) #define _mm512_maskz_extractf32x8_ps(U, A, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ @@ -1135,7 +1135,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __ #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ - (__v2df)(W)) + (__v2df)(__m128d)(W)) #define _mm512_maskz_extractf64x2_pd(U, A, imm) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ @@ -1157,7 +1157,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __ #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ -(__v8si)(W)) +(__v8si)(__m256i)(W)) #define _mm512_maskz_extracti32x8_epi32(U, A, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ @@ -1173,7 +1173,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __ #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ -(__v2di)(W)) +(__v2di)(__m128i)(W)) #define _mm512_maskz_extracti64x2_epi64(U, A, imm) \ (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ @@ -1203,7 +1203,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __ #define _mm512_mask_insertf32x8(W, U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ - (__v16sf)(W)) + (__v16sf)(__m512)(W)) #define _mm512_maskz_insertf32x8(U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ @@ -1225,7 +1225,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __ #define _mm512_mask_insertf64x2(W, U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ - (__v8df)(W)) + (__v8df)(__m512d)(W)) #define _mm512_maskz_insertf64x2(U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ @@ -1255,7 +1
r333626 - [X86] Make 512-bit unmasked load/store builtins more like their 128/256-bit equivalents.
Author: ctopper Date: Wed May 30 22:02:08 2018 New Revision: 333626 URL: http://llvm.org/viewvc/llvm-project?rev=333626&view=rev Log: [X86] Make 512-bit unmasked load/store builtins more like their 128/256-bit equivalents. Previously we were just passing -1 mask to the masked builtin. This changes it to the more generic way that the 128/256 bit use. Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=333626&r1=333625&r2=333626&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Wed May 30 22:02:08 2018 @@ -4590,10 +4590,10 @@ _mm512_maskz_unpacklo_epi64 (__mmask8 __ static __inline __m512i __DEFAULT_FN_ATTRS _mm512_loadu_si512 (void const *__P) { - return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + struct __loadu_si512 { +__m512i __v; + } __attribute__((__packed__, __may_alias__)); + return ((struct __loadu_si512*)__P)->__v; } static __inline __m512i __DEFAULT_FN_ATTRS @@ -4686,10 +4686,7 @@ _mm512_loadu_ps(void const *__p) static __inline __m512 __DEFAULT_FN_ATTRS _mm512_load_ps(void const *__p) { - return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) -1); + return *(__m512*)__p; } static __inline __m512 __DEFAULT_FN_ATTRS @@ -4712,10 +4709,7 @@ _mm512_maskz_load_ps(__mmask16 __U, void static __inline __m512d __DEFAULT_FN_ATTRS _mm512_load_pd(void const *__p) { - return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) -1); + return *(__m512d*)__p; } static __inline __m512d __DEFAULT_FN_ATTRS @@ -4765,8 +4759,10 @@ _mm512_mask_storeu_epi64(void *__P, __mm static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_si512 (void *__P, __m512i __A) { - __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A, -(__mmask16) -1); + struct __storeu_si512 { +__m512i __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_si512*)__P)->__v = __A; } static __inline void __DEFAULT_FN_ATTRS @@ -4785,7 +4781,10 @@ _mm512_mask_storeu_pd(void *__P, __mmask static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_pd(void *__P, __m512d __A) { - __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1); + struct __storeu_pd { +__m512d __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_pd*)__P)->__v = __A; } static __inline void __DEFAULT_FN_ATTRS @@ -4798,7 +4797,10 @@ _mm512_mask_storeu_ps(void *__P, __mmask static __inline void __DEFAULT_FN_ATTRS _mm512_storeu_ps(void *__P, __m512 __A) { - __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1); + struct __storeu_ps { +__m512 __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_ps*)__P)->__v = __A; } static __inline void __DEFAULT_FN_ATTRS Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=333626&r1=333625&r2=333626&view=diff == --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Wed May 30 22:02:08 2018 @@ -159,7 +159,7 @@ __m512d test_mm512_mul_pd(__m512d a, __m void test_mm512_storeu_si512 (void *__P, __m512i __A) { // CHECK-LABEL: @test_mm512_storeu_si512 - // CHECK: store <16 x i32> %{{.*}}, <16 x i32>* %{{.*}}, align 1{{$}} + // CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 1{{$}} // CHECK-NEXT: ret void _mm512_storeu_si512 ( __P,__A); } @@ -253,7 +253,7 @@ void test_mm512_mask_storeu_epi64(void * __m512i test_mm512_loadu_si512 (void *__P) { // CHECK-LABEL: @test_mm512_loadu_si512 - // CHECK: load <16 x i32>, <16 x i32>* %{{.*}}, align 1{{$}} + // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 1{{$}} return _mm512_loadu_si512 ( __P); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits