r339281 - [CodeGen][Timers] Enable llvm::TimePassesIsEnabled when -ftime-report is specified

2018-08-08 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Aug  8 12:14:23 2018
New Revision: 339281

URL: http://llvm.org/viewvc/llvm-project?rev=339281&view=rev
Log:
[CodeGen][Timers] Enable llvm::TimePassesIsEnabled when -ftime-report is 
specified

r330571 added a new FrontendTimesIsEnabled variable and replaced many usages of 
llvm::TimePassesIsEnabled. Including the place that set 
llvm::TimePassesIsEnabled for -ftime-report. The effect of this is that 
-ftime-report now only contains the timers specifically referenced in 
CodeGenAction.cpp and none of the timers in the backend.

This commit adds back the assignment, but otherwise leaves everything else 
unchanged.

Modified:
cfe/trunk/lib/CodeGen/CodeGenAction.cpp

Modified: cfe/trunk/lib/CodeGen/CodeGenAction.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenAction.cpp?rev=339281&r1=339280&r2=339281&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenAction.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenAction.cpp Wed Aug  8 12:14:23 2018
@@ -127,6 +127,7 @@ namespace clang {
 CodeGenOpts, C, CoverageInfo)),
   LinkModules(std::move(LinkModules)) {
   FrontendTimesIsEnabled = TimePasses;
+  llvm::TimePassesIsEnabled = TimePasses;
 }
 llvm::Module *getModule() const { return Gen->GetModule(); }
 std::unique_ptr takeModule() {


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r339282 - [Builtins] Implement __builtin_clrsb to be compatible with gcc

2018-08-08 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Aug  8 12:55:52 2018
New Revision: 339282

URL: http://llvm.org/viewvc/llvm-project?rev=339282&view=rev
Log:
[Builtins] Implement __builtin_clrsb to be compatible with gcc

gcc defines an intrinsic called __builtin_clrsb which counts the number of 
extra sign bits on a number. This is equivalent to counting the number of 
leading zeros on a positive number or the number of leading ones on a negative 
number and subtracting one from the result. Since we can't count leading ones 
we need to invert negative numbers to count zeros.

This patch will cause the builtin to be expanded inline while gcc uses a call 
to a function like clrsbdi2 that is implemented in libgcc. But this is similar 
to what we already do for popcnt. And I don't think compiler-rt supports 
clrsbdi2.

Differential Revision: https://reviews.llvm.org/D50168

Added:
cfe/trunk/test/CodeGen/builtin_clrsb.c   (with props)
Modified:
cfe/trunk/include/clang/Basic/Builtins.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp

Modified: cfe/trunk/include/clang/Basic/Builtins.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Builtins.def?rev=339282&r1=339281&r2=339282&view=diff
==
--- cfe/trunk/include/clang/Basic/Builtins.def (original)
+++ cfe/trunk/include/clang/Basic/Builtins.def Wed Aug  8 12:55:52 2018
@@ -413,6 +413,9 @@ BUILTIN(__builtin_parityll, "iULLi", "nc
 BUILTIN(__builtin_popcount  , "iUi"  , "nc")
 BUILTIN(__builtin_popcountl , "iULi" , "nc")
 BUILTIN(__builtin_popcountll, "iULLi", "nc")
+BUILTIN(__builtin_clrsb  , "ii"  , "nc")
+BUILTIN(__builtin_clrsbl , "iLi" , "nc")
+BUILTIN(__builtin_clrsbll, "iLLi", "nc")
 
 // FIXME: These type signatures are not correct for targets with int != 32-bits
 // or with ULL != 64-bits.

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=339282&r1=339281&r2=339282&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Aug  8 12:55:52 2018
@@ -1537,6 +1537,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(
 return RValue::get(ComplexVal.second);
   }
 
+  case Builtin::BI__builtin_clrsb:
+  case Builtin::BI__builtin_clrsbl:
+  case Builtin::BI__builtin_clrsbll: {
+// clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
+Value *ArgValue = EmitScalarExpr(E->getArg(0));
+
+llvm::Type *ArgType = ArgValue->getType();
+Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
+
+llvm::Type *ResultType = ConvertType(E->getType());
+Value *Zero = llvm::Constant::getNullValue(ArgType);
+Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
+Value *Inverse = Builder.CreateNot(ArgValue, "not");
+Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
+Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
+Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 
1));
+Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
+   "cast");
+return RValue::get(Result);
+  }
   case Builtin::BI__builtin_ctzs:
   case Builtin::BI__builtin_ctz:
   case Builtin::BI__builtin_ctzl:

Added: cfe/trunk/test/CodeGen/builtin_clrsb.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtin_clrsb.c?rev=339282&view=auto
==
--- cfe/trunk/test/CodeGen/builtin_clrsb.c (added)
+++ cfe/trunk/test/CodeGen/builtin_clrsb.c Wed Aug  8 12:55:52 2018
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s
+
+int test__builtin_clrsb(int x) {
+// CHECK-LABEL: test__builtin_clrsb
+// CHECK: [[C:%.*]] = icmp slt i32 [[X:%.*]], 0
+// CHECK-NEXT: [[INV:%.*]] = xor i32 [[X]], -1
+// CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 [[INV]], i32 [[X]]
+// CHECK-NEXT: [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[SEL]], i1 false)
+// CHECK-NEXT: [[SUB:%.*]] = sub i32 [[CTLZ]], 1
+  return __builtin_clrsb(x);
+}
+
+int test__builtin_clrsbll(long long x) {
+// CHECK-LABEL: test__builtin_clrsbll
+// CHECK: [[C:%.*]] = icmp slt i64 [[X:%.*]], 0
+// CHECK-NEXT: [[INV:%.*]] = xor i64 [[X]], -1
+// CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i64 [[INV]], i64 [[X]]
+// CHECK-NEXT: [[CTLZ:%.*]] = call i64 @llvm.ctlz.i64(i64 [[SEL]], i1 false)
+// CHECK-NEXT: [[SUB:%.*]] = sub i64 [[CTLZ]], 1
+// CHECK-NEXT: trunc i64 [[SUB]] to i32
+  return __builtin_clrsbll(x);
+}

Propchange: cfe/trunk/test/CodeGen/builtin_clrsb.c
--
svn:eol-style = native

Propchange: cfe/trunk/test/CodeGen/builtin_clrsb.c
--
svn:keywords = "Author Date Id Rev URL"

Propchange: cfe/trunk/t

r339287 - [Builtins] Add __builtin_clrsb support to IntExprEvaluator::VisitBuiltinCallExpr

2018-08-08 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Aug  8 13:59:40 2018
New Revision: 339287

URL: http://llvm.org/viewvc/llvm-project?rev=339287&view=rev
Log:
[Builtins] Add __builtin_clrsb support to IntExprEvaluator::VisitBuiltinCallExpr

This addresses a FIXME that has existed since before clang supported the 
builtin.

Differential Revision: https://reviews.llvm.org/D50471

Modified:
cfe/trunk/lib/AST/ExprConstant.cpp
cfe/trunk/test/Sema/constant-builtins-2.c

Modified: cfe/trunk/lib/AST/ExprConstant.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ExprConstant.cpp?rev=339287&r1=339286&r2=339287&view=diff
==
--- cfe/trunk/lib/AST/ExprConstant.cpp (original)
+++ cfe/trunk/lib/AST/ExprConstant.cpp Wed Aug  8 13:59:40 2018
@@ -8117,9 +8117,15 @@ bool IntExprEvaluator::VisitBuiltinCallE
   case Builtin::BI__builtin_classify_type:
 return Success((int)EvaluateBuiltinClassifyType(E, Info.getLangOpts()), E);
 
-  // FIXME: BI__builtin_clrsb
-  // FIXME: BI__builtin_clrsbl
-  // FIXME: BI__builtin_clrsbll
+  case Builtin::BI__builtin_clrsb:
+  case Builtin::BI__builtin_clrsbl:
+  case Builtin::BI__builtin_clrsbll: {
+APSInt Val;
+if (!EvaluateInteger(E->getArg(0), Val, Info))
+  return false;
+
+return Success(Val.getBitWidth() - Val.getMinSignedBits(), E);
+  }
 
   case Builtin::BI__builtin_clz:
   case Builtin::BI__builtin_clzl:

Modified: cfe/trunk/test/Sema/constant-builtins-2.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/constant-builtins-2.c?rev=339287&r1=339286&r2=339287&view=diff
==
--- cfe/trunk/test/Sema/constant-builtins-2.c (original)
+++ cfe/trunk/test/Sema/constant-builtins-2.c Wed Aug  8 13:59:40 2018
@@ -132,7 +132,7 @@ char isnormal_snan   [!__builtin_isnorma
 char clz1[__builtin_clz(1) == BITSIZE(int) - 1 ? 1 : -1];
 char clz2[__builtin_clz(7) == BITSIZE(int) - 3 ? 1 : -1];
 char clz3[__builtin_clz(1 << (BITSIZE(int) - 1)) == 0 ? 1 : -1];
-int clz4 = __builtin_clz(0); // expected-error {{not a compile-time constant}}
+//int clz4 = __builtin_clz(0); // expected-error {{not a compile-time 
constant}}
 char clz5[__builtin_clzl(0xFL) == BITSIZE(long) - 4 ? 1 : -1];
 char clz6[__builtin_clzll(0xFFLL) == BITSIZE(long long) - 8 ? 1 : -1];
 char clz7[__builtin_clzs(0x1) == BITSIZE(short) - 1 ? 1 : -1];
@@ -142,7 +142,7 @@ char clz9[__builtin_clzs(0xfff) == BITSI
 char ctz1[__builtin_ctz(1) == 0 ? 1 : -1];
 char ctz2[__builtin_ctz(8) == 3 ? 1 : -1];
 char ctz3[__builtin_ctz(1 << (BITSIZE(int) - 1)) == BITSIZE(int) - 1 ? 1 : -1];
-int ctz4 = __builtin_ctz(0); // expected-error {{not a compile-time constant}}
+//int ctz4 = __builtin_ctz(0); // expected-error {{not a compile-time 
constant}}
 char ctz5[__builtin_ctzl(0x10L) == 4 ? 1 : -1];
 char ctz6[__builtin_ctzll(0x100LL) == 8 ? 1 : -1];
 char ctz7[__builtin_ctzs(1 << (BITSIZE(short) - 1)) == BITSIZE(short) - 1 ? 1 
: -1];
@@ -176,6 +176,19 @@ char ffs4[__builtin_ffs(0xfbe70) == 5 ?
 char ffs5[__builtin_ffs(1U << (BITSIZE(int) - 1)) == BITSIZE(int) ? 1 : -1];
 char ffs6[__builtin_ffsl(0x10L) == 5 ? 1 : -1];
 char ffs7[__builtin_ffsll(0x100LL) == 9 ? 1 : -1];
+
+char clrsb1[__builtin_clrsb(0) == BITSIZE(int) - 1 ? 1 : -1];
+char clrsb2[__builtin_clrsbl(0L) == BITSIZE(long) - 1 ? 1 : -1];
+char clrsb3[__builtin_clrsbll(0LL) == BITSIZE(long long) - 1 ? 1 : -1];
+char clrsb4[__builtin_clrsb(~0) == BITSIZE(int) - 1 ? 1 : -1];
+char clrsb5[__builtin_clrsbl(~0L) == BITSIZE(long) - 1 ? 1 : -1];
+char clrsb6[__builtin_clrsbll(~0LL) == BITSIZE(long long) - 1 ? 1 : -1];
+char clrsb7[__builtin_clrsb(1) == BITSIZE(int) - 2 ? 1 : -1];
+char clrsb8[__builtin_clrsb(~1) == BITSIZE(int) - 2 ? 1 : -1];
+char clrsb9[__builtin_clrsb(1 << (BITSIZE(int) - 1)) == 0 ? 1 : -1];
+char clrsb10[__builtin_clrsb(~(1 << (BITSIZE(int) - 1))) == 0 ? 1 : -1];
+char clrsb11[__builtin_clrsb(0xf) == BITSIZE(int) - 5 ? 1 : -1];
+char clrsb11[__builtin_clrsb(~0x1f) == BITSIZE(int) - 6 ? 1 : -1];
 #undef BITSIZE
 
 // GCC misc stuff


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r339289 - Revert r339287 "[Builtins] Add __builtin_clrsb support to IntExprEvaluator::VisitBuiltinCallExpr"

2018-08-08 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Aug  8 14:21:21 2018
New Revision: 339289

URL: http://llvm.org/viewvc/llvm-project?rev=339289&view=rev
Log:
Revert r339287 "[Builtins] Add __builtin_clrsb support to 
IntExprEvaluator::VisitBuiltinCallExpr"

This add an additional unintended change in it.

Modified:
cfe/trunk/lib/AST/ExprConstant.cpp
cfe/trunk/test/Sema/constant-builtins-2.c

Modified: cfe/trunk/lib/AST/ExprConstant.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ExprConstant.cpp?rev=339289&r1=339288&r2=339289&view=diff
==
--- cfe/trunk/lib/AST/ExprConstant.cpp (original)
+++ cfe/trunk/lib/AST/ExprConstant.cpp Wed Aug  8 14:21:21 2018
@@ -8117,15 +8117,9 @@ bool IntExprEvaluator::VisitBuiltinCallE
   case Builtin::BI__builtin_classify_type:
 return Success((int)EvaluateBuiltinClassifyType(E, Info.getLangOpts()), E);
 
-  case Builtin::BI__builtin_clrsb:
-  case Builtin::BI__builtin_clrsbl:
-  case Builtin::BI__builtin_clrsbll: {
-APSInt Val;
-if (!EvaluateInteger(E->getArg(0), Val, Info))
-  return false;
-
-return Success(Val.getBitWidth() - Val.getMinSignedBits(), E);
-  }
+  // FIXME: BI__builtin_clrsb
+  // FIXME: BI__builtin_clrsbl
+  // FIXME: BI__builtin_clrsbll
 
   case Builtin::BI__builtin_clz:
   case Builtin::BI__builtin_clzl:

Modified: cfe/trunk/test/Sema/constant-builtins-2.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/constant-builtins-2.c?rev=339289&r1=339288&r2=339289&view=diff
==
--- cfe/trunk/test/Sema/constant-builtins-2.c (original)
+++ cfe/trunk/test/Sema/constant-builtins-2.c Wed Aug  8 14:21:21 2018
@@ -132,7 +132,7 @@ char isnormal_snan   [!__builtin_isnorma
 char clz1[__builtin_clz(1) == BITSIZE(int) - 1 ? 1 : -1];
 char clz2[__builtin_clz(7) == BITSIZE(int) - 3 ? 1 : -1];
 char clz3[__builtin_clz(1 << (BITSIZE(int) - 1)) == 0 ? 1 : -1];
-//int clz4 = __builtin_clz(0); // expected-error {{not a compile-time 
constant}}
+int clz4 = __builtin_clz(0); // expected-error {{not a compile-time constant}}
 char clz5[__builtin_clzl(0xFL) == BITSIZE(long) - 4 ? 1 : -1];
 char clz6[__builtin_clzll(0xFFLL) == BITSIZE(long long) - 8 ? 1 : -1];
 char clz7[__builtin_clzs(0x1) == BITSIZE(short) - 1 ? 1 : -1];
@@ -142,7 +142,7 @@ char clz9[__builtin_clzs(0xfff) == BITSI
 char ctz1[__builtin_ctz(1) == 0 ? 1 : -1];
 char ctz2[__builtin_ctz(8) == 3 ? 1 : -1];
 char ctz3[__builtin_ctz(1 << (BITSIZE(int) - 1)) == BITSIZE(int) - 1 ? 1 : -1];
-//int ctz4 = __builtin_ctz(0); // expected-error {{not a compile-time 
constant}}
+int ctz4 = __builtin_ctz(0); // expected-error {{not a compile-time constant}}
 char ctz5[__builtin_ctzl(0x10L) == 4 ? 1 : -1];
 char ctz6[__builtin_ctzll(0x100LL) == 8 ? 1 : -1];
 char ctz7[__builtin_ctzs(1 << (BITSIZE(short) - 1)) == BITSIZE(short) - 1 ? 1 
: -1];
@@ -176,19 +176,6 @@ char ffs4[__builtin_ffs(0xfbe70) == 5 ?
 char ffs5[__builtin_ffs(1U << (BITSIZE(int) - 1)) == BITSIZE(int) ? 1 : -1];
 char ffs6[__builtin_ffsl(0x10L) == 5 ? 1 : -1];
 char ffs7[__builtin_ffsll(0x100LL) == 9 ? 1 : -1];
-
-char clrsb1[__builtin_clrsb(0) == BITSIZE(int) - 1 ? 1 : -1];
-char clrsb2[__builtin_clrsbl(0L) == BITSIZE(long) - 1 ? 1 : -1];
-char clrsb3[__builtin_clrsbll(0LL) == BITSIZE(long long) - 1 ? 1 : -1];
-char clrsb4[__builtin_clrsb(~0) == BITSIZE(int) - 1 ? 1 : -1];
-char clrsb5[__builtin_clrsbl(~0L) == BITSIZE(long) - 1 ? 1 : -1];
-char clrsb6[__builtin_clrsbll(~0LL) == BITSIZE(long long) - 1 ? 1 : -1];
-char clrsb7[__builtin_clrsb(1) == BITSIZE(int) - 2 ? 1 : -1];
-char clrsb8[__builtin_clrsb(~1) == BITSIZE(int) - 2 ? 1 : -1];
-char clrsb9[__builtin_clrsb(1 << (BITSIZE(int) - 1)) == 0 ? 1 : -1];
-char clrsb10[__builtin_clrsb(~(1 << (BITSIZE(int) - 1))) == 0 ? 1 : -1];
-char clrsb11[__builtin_clrsb(0xf) == BITSIZE(int) - 5 ? 1 : -1];
-char clrsb11[__builtin_clrsb(~0x1f) == BITSIZE(int) - 6 ? 1 : -1];
 #undef BITSIZE
 
 // GCC misc stuff


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r339296 - [VFS] Remove superfluous semicolon from unittest.

2018-08-08 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Aug  8 15:31:14 2018
New Revision: 339296

URL: http://llvm.org/viewvc/llvm-project?rev=339296&view=rev
Log:
[VFS] Remove superfluous semicolon from unittest.

Modified:
cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp

Modified: cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp?rev=339296&r1=339295&r2=339296&view=diff
==
--- cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp (original)
+++ cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp Wed Aug  8 15:31:14 2018
@@ -158,7 +158,7 @@ std::string getPosixPath(std::string S)
   SmallString<128> Result;
   llvm::sys::path::native(S, Result, llvm::sys::path::Style::posix);
   return Result.str();
-};
+}
 } // end anonymous namespace
 
 TEST(VirtualFileSystemTest, StatusQueries) {


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r339295 - [Builtins] Add __builtin_clrsb support to IntExprEvaluator::VisitBuiltinCallExpr

2018-08-08 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Aug  8 15:31:12 2018
New Revision: 339295

URL: http://llvm.org/viewvc/llvm-project?rev=339295&view=rev
Log:
[Builtins] Add __builtin_clrsb support to IntExprEvaluator::VisitBuiltinCallExpr

This addresses a FIXME that has existed since before clang supported the 
builtin.

This time with only reviewed changes.

Differential Revision: https://reviews.llvm.org/D50471

Modified:
cfe/trunk/lib/AST/ExprConstant.cpp
cfe/trunk/test/Sema/constant-builtins-2.c

Modified: cfe/trunk/lib/AST/ExprConstant.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ExprConstant.cpp?rev=339295&r1=339294&r2=339295&view=diff
==
--- cfe/trunk/lib/AST/ExprConstant.cpp (original)
+++ cfe/trunk/lib/AST/ExprConstant.cpp Wed Aug  8 15:31:12 2018
@@ -8117,9 +8117,15 @@ bool IntExprEvaluator::VisitBuiltinCallE
   case Builtin::BI__builtin_classify_type:
 return Success((int)EvaluateBuiltinClassifyType(E, Info.getLangOpts()), E);
 
-  // FIXME: BI__builtin_clrsb
-  // FIXME: BI__builtin_clrsbl
-  // FIXME: BI__builtin_clrsbll
+  case Builtin::BI__builtin_clrsb:
+  case Builtin::BI__builtin_clrsbl:
+  case Builtin::BI__builtin_clrsbll: {
+APSInt Val;
+if (!EvaluateInteger(E->getArg(0), Val, Info))
+  return false;
+
+return Success(Val.getBitWidth() - Val.getMinSignedBits(), E);
+  }
 
   case Builtin::BI__builtin_clz:
   case Builtin::BI__builtin_clzl:

Modified: cfe/trunk/test/Sema/constant-builtins-2.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/constant-builtins-2.c?rev=339295&r1=339294&r2=339295&view=diff
==
--- cfe/trunk/test/Sema/constant-builtins-2.c (original)
+++ cfe/trunk/test/Sema/constant-builtins-2.c Wed Aug  8 15:31:12 2018
@@ -176,6 +176,19 @@ char ffs4[__builtin_ffs(0xfbe70) == 5 ?
 char ffs5[__builtin_ffs(1U << (BITSIZE(int) - 1)) == BITSIZE(int) ? 1 : -1];
 char ffs6[__builtin_ffsl(0x10L) == 5 ? 1 : -1];
 char ffs7[__builtin_ffsll(0x100LL) == 9 ? 1 : -1];
+
+char clrsb1[__builtin_clrsb(0) == BITSIZE(int) - 1 ? 1 : -1];
+char clrsb2[__builtin_clrsbl(0L) == BITSIZE(long) - 1 ? 1 : -1];
+char clrsb3[__builtin_clrsbll(0LL) == BITSIZE(long long) - 1 ? 1 : -1];
+char clrsb4[__builtin_clrsb(~0) == BITSIZE(int) - 1 ? 1 : -1];
+char clrsb5[__builtin_clrsbl(~0L) == BITSIZE(long) - 1 ? 1 : -1];
+char clrsb6[__builtin_clrsbll(~0LL) == BITSIZE(long long) - 1 ? 1 : -1];
+char clrsb7[__builtin_clrsb(1) == BITSIZE(int) - 2 ? 1 : -1];
+char clrsb8[__builtin_clrsb(~1) == BITSIZE(int) - 2 ? 1 : -1];
+char clrsb9[__builtin_clrsb(1 << (BITSIZE(int) - 1)) == 0 ? 1 : -1];
+char clrsb10[__builtin_clrsb(~(1 << (BITSIZE(int) - 1))) == 0 ? 1 : -1];
+char clrsb11[__builtin_clrsb(0xf) == BITSIZE(int) - 5 ? 1 : -1];
+char clrsb12[__builtin_clrsb(~0x1f) == BITSIZE(int) - 6 ? 1 : -1];
 #undef BITSIZE
 
 // GCC misc stuff


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r339721 - [InlineAsm] Update the min-legal-vector-width function attribute based on inputs and outputs to inline assembly

2018-08-14 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue Aug 14 13:21:05 2018
New Revision: 339721

URL: http://llvm.org/viewvc/llvm-project?rev=339721&view=rev
Log:
[InlineAsm] Update the min-legal-vector-width function attribute based on 
inputs and outputs to inline assembly

Summary:
Another piece of my ongoing to work for prefer-vector-width.

min-legal-vector-width will eventually be used by the X86 backend to know 
whether it needs to make 512 bits type legal when prefer-vector-width=256. If 
the user used inline assembly that passed in/out a 512-bit register, we need to 
make sure 512 bits are considered legal. Otherwise we'll get an assert failure 
when we try to wire up the inline assembly to the rest of the code.

This patch just checks the LLVM IR types to see if they are vectors and then 
updates the attribute based on their total width. I'm not sure if this is the 
best way to do this or if there's any subtlety I might have missed. So if 
anyone has other opinions on how to do this I'm open to suggestions.

Reviewers: chandlerc, rsmith, rnk

Reviewed By: rnk

Subscribers: eraman, cfe-commits

Differential Revision: https://reviews.llvm.org/D50678

Added:
cfe/trunk/test/CodeGen/x86-inline-asm-min-vector-width.c
Modified:
cfe/trunk/lib/CodeGen/CGStmt.cpp

Modified: cfe/trunk/lib/CodeGen/CGStmt.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmt.cpp?rev=339721&r1=339720&r2=339721&view=diff
==
--- cfe/trunk/lib/CodeGen/CGStmt.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmt.cpp Tue Aug 14 13:21:05 2018
@@ -1979,6 +1979,11 @@ void CodeGenFunction::EmitAsmStmt(const
   diag::err_asm_invalid_type_in_input)
 << OutExpr->getType() << OutputConstraint;
   }
+
+  // Update largest vector width for any vector types.
+  if (auto *VT = dyn_cast(ResultRegTypes.back()))
+LargestVectorWidth = std::max(LargestVectorWidth,
+  VT->getPrimitiveSizeInBits());
 } else {
   ArgTypes.push_back(Dest.getAddress().getType());
   Args.push_back(Dest.getPointer());
@@ -2000,6 +2005,10 @@ void CodeGenFunction::EmitAsmStmt(const
Arg->getType()))
 Arg = Builder.CreateBitCast(Arg, AdjTy);
 
+  // Update largest vector width for any vector types.
+  if (auto *VT = dyn_cast(Arg->getType()))
+LargestVectorWidth = std::max(LargestVectorWidth,
+  VT->getPrimitiveSizeInBits());
   if (Info.allowsRegister())
 InOutConstraints += llvm::utostr(i);
   else
@@ -2080,6 +2089,11 @@ void CodeGenFunction::EmitAsmStmt(const
   CGM.getDiags().Report(S.getAsmLoc(), diag::err_asm_invalid_type_in_input)
   << InputExpr->getType() << InputConstraint;
 
+// Update largest vector width for any vector types.
+if (auto *VT = dyn_cast(Arg->getType()))
+  LargestVectorWidth = std::max(LargestVectorWidth,
+VT->getPrimitiveSizeInBits());
+
 ArgTypes.push_back(Arg->getType());
 Args.push_back(Arg);
 Constraints += InputConstraint;

Added: cfe/trunk/test/CodeGen/x86-inline-asm-min-vector-width.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/x86-inline-asm-min-vector-width.c?rev=339721&view=auto
==
--- cfe/trunk/test/CodeGen/x86-inline-asm-min-vector-width.c (added)
+++ cfe/trunk/test/CodeGen/x86-inline-asm-min-vector-width.c Tue Aug 14 
13:21:05 2018
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm 
-target-feature +avx512f -o - | FileCheck %s
+
+typedef long long __m128i __attribute__ ((vector_size (16)));
+typedef long long __m256i __attribute__ ((vector_size (32)));
+typedef long long __m512i __attribute__ ((vector_size (64)));
+
+// CHECK: define <2 x i64> @testXMMout(<2 x i64>* %p) #0
+__m128i testXMMout(__m128i *p) {
+  __m128i xmm0;
+  __asm__("vmovdqu %1, %0" :"=v"(xmm0) : "m"(*(__m128i*)p));
+  return xmm0;
+}
+
+// CHECK: define <4 x i64> @testYMMout(<4 x i64>* %p) #1
+__m256i testYMMout(__m256i *p) {
+  __m256i ymm0;
+  __asm__("vmovdqu %1, %0" :"=v"(ymm0) : "m"(*(__m256i*)p));
+  return ymm0;
+}
+
+// CHECK: define <8 x i64> @testZMMout(<8 x i64>* %p) #2
+__m512i testZMMout(__m512i *p) {
+  __m512i zmm0;
+  __asm__("vmovdqu64 %1, %0" :"=v"(zmm0) : "m"(*(__m512i*)p));
+  return zmm0;
+}
+
+// CHECK: define void @testXMMin(<2 x i64> %xmm0, <2 x i64>* %p) #0
+void testXMMin(__m128i xmm0, __m128i *p) {
+  __asm__("vmovdqu %0, %1" : : "v"(xmm0), "m"(*(__m128i*)p));
+}
+
+// CHECK: define void @testYMMin(<4 x i64> %ymm0, <4 x i64>* %p) #1
+void testYMMin(__m256i ymm0, __m256i *p) {
+  __asm__("vmovdqu %0, %1" : : "v"(ymm0), "m"(*(__m256i*)p));
+}
+
+// CHECK: define void @testZMMin(<8 x i64> %zmm0, <8 x i64>* %p) #2
+void testZMMin(__m512i zmm0, __m51

r339843 - [X86] Remove masking from the 512-bit padds and psubs builtins. Use select builtin instead.

2018-08-15 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Aug 15 23:20:29 2018
New Revision: 339843

URL: http://llvm.org/viewvc/llvm-project?rev=339843&view=rev
Log:
[X86] Remove masking from the 512-bit padds and psubs builtins. Use select 
builtin instead.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/test/CodeGen/avx512bw-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=339843&r1=339842&r2=339843&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Aug 15 23:20:29 2018
@@ -1038,8 +1038,8 @@ TARGET_BUILTIN(__builtin_ia32_packssdw51
 TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "ncV:512:", 
"avx512bw")
-TARGET_BUILTIN(__builtin_ia32_paddsb512_mask, "V64cV64cV64cV64cULLi", 
"ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_paddsw512_mask, "V32sV32sV32sV32sUi", 
"ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsb512, "V64cV64cV64c", "ncV:512:", 
"avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsw512, "V32sV32sV32s", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", 
"ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", 
"ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "ncV:512:", 
"avx512bw")
@@ -1051,8 +1051,8 @@ TARGET_BUILTIN(__builtin_ia32_pminsw512,
 TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", 
"avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psubsb512_mask, "V64cV64cV64cV64cULLi", 
"ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psubsw512_mask, "V32sV32sV32sV32sUi", 
"ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsb512, "V64cV64cV64c", "ncV:512:", 
"avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsw512, "V32sV32sV32s", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", 
"ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", 
"ncV:512:", "avx512bw")
 

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=339843&r1=339842&r2=339843&view=diff
==
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Wed Aug 15 23:20:29 2018
@@ -422,57 +422,45 @@ _mm512_maskz_packus_epi16(__mmask64 __M,
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_adds_epi8 (__m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
-  (__v64qi) __B,
-  (__v64qi) _mm512_setzero_si512(),
-  (__mmask64) -1);
+  return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
-   __m512i __B)
+_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
-  (__v64qi) __B,
-  (__v64qi) __W,
-  (__mmask64) __U);
+  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+(__v64qi)_mm512_adds_epi8(__A, __B),
+(__v64qi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
-  (__v64qi) __B,
-  (__v64qi) _mm512_setzero_si512(),
-  (__mmask64) __U);
+  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+(__v64qi)_mm512_adds_epi8(__A, __B),
+(__v64qi)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_adds_epi16 (__m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
-  (__v32hi) __B,
-  (__v32hi) _mm512_setzero_si512(),
-  (__mmask32) -1);
+  return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-  __m512i __B)
+_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U

r339845 - [X86] Remove masking from the 512-bit paddus/psubus builtins. Use a select builtin instead.

2018-08-16 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Thu Aug 16 00:28:06 2018
New Revision: 339845

URL: http://llvm.org/viewvc/llvm-project?rev=339845&view=rev
Log:
[X86] Remove masking from the 512-bit paddus/psubus builtins. Use a select 
builtin instead.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512bwintrin.h

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=339845&r1=339844&r2=339845&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu Aug 16 00:28:06 2018
@@ -1040,8 +1040,8 @@ TARGET_BUILTIN(__builtin_ia32_packusdw51
 TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_paddsb512, "V64cV64cV64c", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_paddsw512, "V32sV32sV32s", "ncV:512:", 
"avx512bw")
-TARGET_BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", 
"ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", 
"ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusb512, "V64cV64cV64c", "ncV:512:", 
"avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusw512, "V32sV32sV32s", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "ncV:512:", 
"avx512bw")
@@ -1053,8 +1053,8 @@ TARGET_BUILTIN(__builtin_ia32_pminuw512,
 TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psubsb512, "V64cV64cV64c", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psubsw512, "V32sV32sV32s", "ncV:512:", 
"avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", 
"ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", 
"ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusb512, "V64cV64cV64c", "ncV:512:", 
"avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusw512, "V32sV32sV32s", "ncV:512:", 
"avx512bw")
 
 TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128_mask, "V2LLiV2LLiV2LLiUc", 
"ncV:128:", "avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256_mask, "V4LLiV4LLiV4LLiUc", 
"ncV:256:", "avx512cd,avx512vl")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=339845&r1=339844&r2=339845&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Aug 16 00:28:06 2018
@@ -8931,12 +8931,6 @@ static Value *EmitX86AddSubSatExpr(CodeG
 Res = CGF.Builder.CreateSub(Select, Ops[1]);
   }
 
-  if (E->getNumArgs() == 4) { // For masked intrinsics.
-Value *VecSRC = Ops[2];
-Value *Mask = Ops[3];
-return EmitX86Select(CGF, Mask, Res, VecSRC);
-  }
-
   return Res;
 }
 
@@ -10563,15 +10557,15 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 Load->setVolatile(true);
 return Load;
   }
-  case X86::BI__builtin_ia32_paddusb512_mask:
-  case X86::BI__builtin_ia32_paddusw512_mask:
+  case X86::BI__builtin_ia32_paddusb512:
+  case X86::BI__builtin_ia32_paddusw512:
   case X86::BI__builtin_ia32_paddusb256:
   case X86::BI__builtin_ia32_paddusw256:
   case X86::BI__builtin_ia32_paddusb128:
   case X86::BI__builtin_ia32_paddusw128:
 return EmitX86AddSubSatExpr(*this, E, Ops, true /* IsAddition */);
-  case X86::BI__builtin_ia32_psubusb512_mask:
-  case X86::BI__builtin_ia32_psubusw512_mask:
+  case X86::BI__builtin_ia32_psubusb512:
+  case X86::BI__builtin_ia32_psubusw512:
   case X86::BI__builtin_ia32_psubusb256:
   case X86::BI__builtin_ia32_psubusw256:
   case X86::BI__builtin_ia32_psubusb128:

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=339845&r1=339844&r2=339845&view=diff
==
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Thu Aug 16 00:28:06 2018
@@ -466,57 +466,45 @@ _mm512_maskz_adds_epi16 (__mmask32 __U,
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_adds_epu8 (__m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
-  (__v64qi) __B,
-  (__v64qi) _mm512_setzero_si512(),
-  (__mmask64) -1);
+  return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __

r331893 - [X86] Only enable the __ud2 and __int2c builtins if intrin.h has been included.

2018-05-09 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed May  9 09:57:48 2018
New Revision: 331893

URL: http://llvm.org/viewvc/llvm-project?rev=331893&view=rev
Log:
[X86] Only enable the __ud2 and __int2c builtins if intrin.h has been included.

Differential Revision: https://reviews.llvm.org/D46332

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=331893&r1=331892&r2=331893&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed May  9 09:57:48 2018
@@ -1899,8 +1899,8 @@ TARGET_HEADER_BUILTIN(__emulu, "ULLiUiUi
 TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", 
ALL_MS_LANGUAGES, "")
 
 TARGET_HEADER_BUILTIN(__stosb, "vUc*Ucz", "nh", "intrin.h", ALL_MS_LANGUAGES, 
"")
-TARGET_HEADER_BUILTIN(__int2c, "v",   "nr", "intrin.h", ALL_MS_LANGUAGES, 
"")
-TARGET_HEADER_BUILTIN(__ud2,   "v",   "nr", "intrin.h", ALL_MS_LANGUAGES, 
"")
+TARGET_HEADER_BUILTIN(__int2c, "v",   "nhr", "intrin.h", ALL_MS_LANGUAGES, 
"")
+TARGET_HEADER_BUILTIN(__ud2,   "v",   "nhr", "intrin.h", ALL_MS_LANGUAGES, 
"")
 
 TARGET_HEADER_BUILTIN(__readfsbyte,  "UcUNi", "nh", "intrin.h", 
ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(__readfsword,  "UsUNi", "nh", "intrin.h", 
ALL_MS_LANGUAGES, "")


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r331943 - [Builtins] Improve the IR emitted for MSVC compatible rotr/rotl builtins to match what the middle and backends understand

2018-05-09 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed May  9 17:05:13 2018
New Revision: 331943

URL: http://llvm.org/viewvc/llvm-project?rev=331943&view=rev
Log:
[Builtins] Improve the IR emitted for MSVC compatible rotr/rotl builtins to 
match what the middle and backends understand

Previously we emitted something like

rotl(x, n) {
  n &= bitwidth-1;
  return n != 0 ? ((x << n) | (x >> (bitwidth - n)) : x;
}

We use a select to avoid the undefined behavior on the (bitwidth - n) shift.

The middle and backend don't really recognize this as a rotate and end up 
emitting a cmov or control flow because of the select.

A better pattern is (x << (n & mask)) | (x << (-n & mask)) where mask is 
bitwidth - 1.

Fixes the main complaint in PR37387. There's still some work to be done if the 
user writes that sequence directly on a short or char where type promotion 
rules can prevent it from being recognized. The builtin is emitting direct IR 
with unpromoted types so that isn't a problem for it.

Differential Revision: https://reviews.llvm.org/D46656

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/ms-intrinsics-rotations.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=331943&r1=331942&r2=331943&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed May  9 17:05:13 2018
@@ -1409,20 +1409,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(
 
 llvm::Type *ArgType = Val->getType();
 Shift = Builder.CreateIntCast(Shift, ArgType, false);
-unsigned ArgWidth = cast(ArgType)->getBitWidth();
-Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
-Value *ArgZero = llvm::Constant::getNullValue(ArgType);
-
+unsigned ArgWidth = ArgType->getIntegerBitWidth();
 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
-Shift = Builder.CreateAnd(Shift, Mask);
-Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
-
-Value *RightShifted = Builder.CreateLShr(Val, Shift);
-Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
-Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
 
-Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
-Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
+Value *RightShiftAmt = Builder.CreateAnd(Shift, Mask);
+Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt);
+Value *LeftShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask);
+Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt);
+Value *Result = Builder.CreateOr(LeftShifted, RightShifted);
 return RValue::get(Result);
   }
   case Builtin::BI_rotl8:
@@ -1435,20 +1429,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(
 
 llvm::Type *ArgType = Val->getType();
 Shift = Builder.CreateIntCast(Shift, ArgType, false);
-unsigned ArgWidth = cast(ArgType)->getBitWidth();
-Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
-Value *ArgZero = llvm::Constant::getNullValue(ArgType);
-
+unsigned ArgWidth = ArgType->getIntegerBitWidth();
 Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
-Shift = Builder.CreateAnd(Shift, Mask);
-Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
-
-Value *LeftShifted = Builder.CreateShl(Val, Shift);
-Value *RightShifted = Builder.CreateLShr(Val, RightShift);
-Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
 
-Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
-Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
+Value *LeftShiftAmt = Builder.CreateAnd(Shift, Mask);
+Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt);
+Value *RightShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask);
+Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt);
+Value *Result = Builder.CreateOr(LeftShifted, RightShifted);
 return RValue::get(Result);
   }
   case Builtin::BI__builtin_unpredictable: {

Modified: cfe/trunk/test/CodeGen/ms-intrinsics-rotations.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/ms-intrinsics-rotations.c?rev=331943&r1=331942&r2=331943&view=diff
==
--- cfe/trunk/test/CodeGen/ms-intrinsics-rotations.c (original)
+++ cfe/trunk/test/CodeGen/ms-intrinsics-rotations.c Wed May  9 17:05:13 2018
@@ -30,13 +30,12 @@ unsigned char test_rotl8(unsigned char v
   return _rotl8(value, shift);
 }
 // CHECK: i8 @test_rotl8
-// CHECK:   [[SHIFT:%[0-9]+]] = and i8 %{{[0-9]+}}, 7
-// CHECK:   [[NEGSHIFT:%[0-9]+]] = sub i8 8, [[SHIFT]]
-// CHECK:   [[HIGH:%[0-9]+]] = shl i8 [[VALUE:%[0-9]+]], [[SHIFT]]
-// CHECK:   [[LOW:%[0-9]+]] = lshr i8 [[VALUE]], [[NEGSHIFT]]
-// CHECK:   [[ROTATED:%[0-9]+]] = or i8 [[HIGH]], [[LOW]]
-// CHECK:   [[ISZERO:%

r331958 - [X86] Change the implementation of scalar masked load/store intrinsics to not use a 512-bit intermediate vector.

2018-05-09 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed May  9 22:43:43 2018
New Revision: 331958

URL: http://llvm.org/viewvc/llvm-project?rev=331958&view=rev
Log:
[X86] Change the implementation of scalar masked load/store intrinsics to not 
use a 512-bit intermediate vector.

This is unnecessary for AVX512VL supporting CPUs like SKX. We can just emit a 
128-bit masked load/store here no matter what. The backend will widen it to 
512-bits on KNL CPUs.

Fixes the frontend portion of PR37386. Need to fix the backend to optimize the 
new sequences well.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=331958&r1=331957&r2=331958&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed May  9 22:43:43 2018
@@ -1523,10 +1523,10 @@ TARGET_BUILTIN(__builtin_ia32_fixupimmps
 TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V8dV8d*V8dUc", "n", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V2dV2d*V2dUc", "n", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc", "n", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V16fV16f*V16fUs", "n", 
"avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V4fV4f*V4fUc", "n", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc", "n", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc", "n", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc", "n", 
"avx512vl")
@@ -1543,10 +1543,10 @@ TARGET_BUILTIN(__builtin_ia32_storedquhi
 TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs", "n", 
"avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, "vV32c*V32cUi", "n", 
"avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV8d*V8dUc", "n", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV2d*V2dUc", "n", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc", "n", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV16f*V16fUs", "n", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV4f*V4fUc", "n", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc", "n", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2LLi*V2LLiUc", "n", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4LLi*V4LLiUc", "n", 
"avx512vl")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=331958&r1=331957&r2=331958&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed May  9 22:43:43 2018
@@ -8735,7 +8735,7 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 
   case X86::BI__builtin_ia32_storess128_mask:
   case X86::BI__builtin_ia32_storesd128_mask: {
-return EmitX86MaskedStore(*this, Ops, 16);
+return EmitX86MaskedStore(*this, Ops, 1);
   }
   case X86::BI__builtin_ia32_vpopcntb_128:
   case X86::BI__builtin_ia32_vpopcntd_128:
@@ -8819,7 +8819,7 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 
   case X86::BI__builtin_ia32_loadss128_mask:
   case X86::BI__builtin_ia32_loadsd128_mask:
-return EmitX86MaskedLoad(*this, Ops, 16);
+return EmitX86MaskedLoad(*this, Ops, 1);
 
   case X86::BI__builtin_ia32_loadaps128_mask:
   case X86::BI__builtin_ia32_loadaps256_mask:

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=331958&r1=331957&r2=331958&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Wed May  9 22:43:43 2018
@@ -9091,17 +9091,13 @@ _mm_maskz_move_sd (__mmask8 __U, __m128d
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
 {
-  __builtin_ia32_storess128_mask ((__v16sf *)__W,
-(__v16sf) _mm512_castps128_ps512(__A),
-(__mmask16) __U & (__mmask16)1

r332108 - [X86] Fix the file header name on fmaintrin.h

2018-05-11 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri May 11 10:37:40 2018
New Revision: 332108

URL: http://llvm.org/viewvc/llvm-project?rev=332108&view=rev
Log:
[X86] Fix the file header name on fmaintrin.h

Modified:
cfe/trunk/lib/Headers/fmaintrin.h

Modified: cfe/trunk/lib/Headers/fmaintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/fmaintrin.h?rev=332108&r1=332107&r2=332108&view=diff
==
--- cfe/trunk/lib/Headers/fmaintrin.h (original)
+++ cfe/trunk/lib/Headers/fmaintrin.h Fri May 11 10:37:40 2018
@@ -1,4 +1,4 @@
-/*=== fma4intrin.h - FMA4 intrinsics ---===
+/*=== fmaintrin.h - FMA intrinsics -===
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to 
deal


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r332203 - [X86] Emit better code for _mm_cvtu32_sd, _mm_cvtu64_sd, _mm_cvtu32_ss, and _mm_cvtu64_ss.

2018-05-13 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sun May 13 16:03:30 2018
New Revision: 332203

URL: http://llvm.org/viewvc/llvm-project?rev=332203&view=rev
Log:
[X86] Emit better code for _mm_cvtu32_sd, _mm_cvtu64_sd, _mm_cvtu32_ss, and 
_mm_cvtu64_ss.

We can use direct C code for these that will use uitofp and insertelement 
instructions.

For the versions that take an explicit rounding mode we can't do this.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332203&r1=332202&r2=332203&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sun May 13 16:03:30 2018
@@ -1840,7 +1840,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtw2mask2
 TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "nc", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "nc", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtusi2sd32, "V2dV2dUi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi", 
"nc", "avx512vbmi")
 TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs", 
"nc", "avx512vbmi,avx512vl")

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=332203&r1=332202&r2=332203&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Sun May 13 16:03:30 2018
@@ -9521,7 +9521,8 @@ _mm_maskz_cvtss_sd (__mmask8 __U, __m128
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtu32_sd (__m128d __A, unsigned __B)
 {
-  return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
+  __A[0] = __B;
+  return __A;
 }
 
 #ifdef __x86_64__
@@ -9532,8 +9533,8 @@ _mm_cvtu32_sd (__m128d __A, unsigned __B
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
 {
-  return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
- _MM_FROUND_CUR_DIRECTION);
+  __A[0] = __B;
+  return __A;
 }
 #endif
 
@@ -9544,8 +9545,8 @@ _mm_cvtu64_sd (__m128d __A, unsigned lon
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtu32_ss (__m128 __A, unsigned __B)
 {
-  return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
-_MM_FROUND_CUR_DIRECTION);
+  __A[0] = __B;
+  return __A;
 }
 
 #ifdef __x86_64__
@@ -9556,8 +9557,8 @@ _mm_cvtu32_ss (__m128 __A, unsigned __B)
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
 {
-  return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
-_MM_FROUND_CUR_DIRECTION);
+  __A[0] = __B;
+  return __A;
 }
 #endif
 

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=332203&r1=332202&r2=332203&view=diff
==
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sun May 13 16:03:30 2018
@@ -7007,7 +7007,8 @@ __m128d test_mm_maskz_cvt_roundss_sd( __
 
 __m128d test_mm_cvtu32_sd(__m128d __A, unsigned __B) {
   // CHECK-LABEL: @test_mm_cvtu32_sd
-  // CHECK: @llvm.x86.avx512.cvtusi2sd
+  // CHECK: uitofp i32 %{{.*}} to double
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_cvtu32_sd(__A, __B); 
 }
 
@@ -7020,7 +7021,8 @@ __m128d test_mm_cvt_roundu64_sd(__m128d
 
 __m128d test_mm_cvtu64_sd(__m128d __A, unsigned long long __B) {
   // CHECK-LABEL: @test_mm_cvtu64_sd
-  // CHECK: @llvm.x86.avx512.cvtusi642sd
+  // CHECK: uitofp i64 %{{.*}} to double
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_cvtu64_sd(__A, __B); 
 }
 #endif
@@ -7033,7 +7035,8 @@ __m128 test_mm_cvt_roundu32_ss(__m128 __
 
 __m128 test_mm_cvtu32_ss(__m128 __A, unsigned __B) {
   // CHECK-LABEL: @test_mm_cvtu32_ss
-  // CHECK: @llvm.x86.avx512.cvtusi2ss
+  // CHECK: uitofp i32 %{{.*}} to float
+  // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_cvtu32_ss(__A, __B); 
 }
 
@@ -7046,7 +7049,8 @@ __m128 test_mm_cvt_roundu64_ss(__m128 __
 
 __m128 test_mm_cvtu64_ss(__m128 __A, unsigned long long __B) {
   // CHECK-LABEL: @test_mm_cvtu64_ss
-  // CHECK: @llvm.x86.avx512.cvtusi642ss
+  // CHECK: uitofp i64 %{{.*}} to float
+  // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   retu

r332210 - [X86] Use __builtin_convertvector to implement _mm512_cvtps_pd.

2018-05-13 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sun May 13 21:05:06 2018
New Revision: 332210

URL: http://llvm.org/viewvc/llvm-project?rev=332210&view=rev
Log:
[X86] Use __builtin_convertvector to implement _mm512_cvtps_pd.

If we're using default rounding mode we can let __builtin_convertvector to 
generate an fpextend. This matches 128 and 256 bit.

If we're using the version that takes an explicit rounding mode argument we 
would need to look at the immediate to see if its CUR_DIRECTION.

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=332210&r1=332209&r2=332210&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Sun May 13 21:05:06 2018
@@ -9311,11 +9311,7 @@ _mm512_maskz_expand_epi32 (__mmask16 __U
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_cvtps_pd (__m256 __A)
 {
-  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
-(__v8df)
-_mm512_undefined_pd (),
-(__mmask8) -1,
-_MM_FROUND_CUR_DIRECTION);
+  return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=332210&r1=332209&r2=332210&view=diff
==
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sun May 13 21:05:06 2018
@@ -6630,14 +6630,14 @@ __m512d test_mm512_maskz_cvt_roundps_pd(
 
 __m512d test_mm512_cvtps_pd(__m256 __A) {
   // CHECK-LABEL: @test_mm512_cvtps_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512
+  // CHECK: fpext <8 x float> %{{.*}} to <8 x double>
   return _mm512_cvtps_pd(__A); 
 }
 
 __m512d test_mm512_cvtpslo_pd(__m512 __A) {
   // CHECK-LABEL: @test_mm512_cvtpslo_pd
   // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <8 x 
i32> 
-  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512
+  // CHECK: fpext <8 x float> %{{.*}} to <8 x double>
   return _mm512_cvtpslo_pd(__A);
 }
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r332213 - [X86] Use select instrution and fpextend in the implementation of _mm512_mask_cvtps_pd and _mm512_maskz_cvtps_pd.

2018-05-13 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sun May 13 21:57:46 2018
New Revision: 332213

URL: http://llvm.org/viewvc/llvm-project?rev=332213&view=rev
Log:
[X86] Use select instrution and fpextend in the implementation of 
_mm512_mask_cvtps_pd and _mm512_maskz_cvtps_pd.

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=332213&r1=332212&r2=332213&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Sun May 13 21:57:46 2018
@@ -9317,20 +9317,17 @@ _mm512_cvtps_pd (__m256 __A)
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
 {
-  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
-(__v8df) __W,
-(__mmask8) __U,
-_MM_FROUND_CUR_DIRECTION);
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+  (__v8df)_mm512_cvtps_pd(__A),
+  (__v8df)__W);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
 {
-  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
-(__v8df)
-_mm512_setzero_pd (),
-(__mmask8) __U,
-_MM_FROUND_CUR_DIRECTION);
+  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
+  (__v8df)_mm512_cvtps_pd(__A),
+  (__v8df)_mm512_setzero_pd());
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=332213&r1=332212&r2=332213&view=diff
==
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sun May 13 21:57:46 2018
@@ -6643,20 +6643,23 @@ __m512d test_mm512_cvtpslo_pd(__m512 __A
 
 __m512d test_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm512_mask_cvtps_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512
+  // CHECK: fpext <8 x float> %{{.*}} to <8 x double>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_cvtps_pd(__W, __U, __A); 
 }
 
 __m512d test_mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A) {
   // CHECK-LABEL: @test_mm512_mask_cvtpslo_pd
   // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <8 x 
i32> 
-  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512
+  // CHECK: fpext <8 x float> %{{.*}} to <8 x double>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_cvtpslo_pd(__W, __U, __A);
 }
 
 __m512d test_mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm512_maskz_cvtps_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512
+  // CHECK: fpext <8 x float> %{{.*}} to <8 x double>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_cvtps_pd(__U, __A); 
 }
 __m512d test_mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A) {


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r332266 - [X86] Use __builtin_convertvector to replace some of the avx512 truncate builtins.

2018-05-14 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Mon May 14 10:50:40 2018
New Revision: 332266

URL: http://llvm.org/viewvc/llvm-project?rev=332266&view=rev
Log:
[X86] Use __builtin_convertvector to replace some of the avx512 truncate 
builtins.

As long as the destination type is a 256 or 128 bit vector with the same number 
of elements we can use __builtin_convertvector to directly generate trunc IR 
instruction which will be handled natively by the backend.

Differential Revision: https://reviews.llvm.org/D46742

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vlbwintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c
cfe/trunk/test/CodeGen/avx512vlbw-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332266&r1=332265&r2=332266&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 14 10:50:40 2018
@@ -1355,7 +1355,6 @@ TARGET_BUILTIN(__builtin_ia32_vpshrdw512
 
 TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "nc", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "nc", 
"avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "nc", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "nc", 
"avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "nc", 
"avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "nc", 
"avx512vl,avx512dq")
@@ -1397,7 +1396,6 @@ TARGET_BUILTIN(__builtin_ia32_pmovswb256
 TARGET_BUILTIN(__builtin_ia32_pmovuswb128_mask, "V16cV8sV16cUc", "nc", 
"avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovuswb256_mask, "V16cV16sV16cUs", "nc", 
"avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovwb128_mask, "V16cV8sV16cUc", "nc", 
"avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovwb256_mask, "V16cV16sV16cUs", "nc", 
"avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "nc", 
"avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "nc", 
"avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8LLiV8fV8LLiUcIi", "nc", 
"avx512dq")
@@ -1719,16 +1717,12 @@ TARGET_BUILTIN(__builtin_ia32_pmovusqw12
 TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2LLiUc", "n", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4LLiV8sUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc", "n", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "nc", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "n", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "n", 
"avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "nc", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "n", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "nc", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "n", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8LLiV8iUc", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "n", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "n", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc", "n", 
"avx512vl,avx512bw")
@@ -1738,7 +1732,6 @@ TARGET_BUILTIN(__builtin_ia32_pmovdb256m
 TARGET_BUILTIN(__builtin_ia32_pmovwb256mem_mask, "vV16c*V16sUs", "n", 
"avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc", "nc", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2LLiV16cUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2LLiUc", "n", 
"avx512vl")
@@ -1746,7 +1739,6 @@ TARGET_BUILTIN(__builtin_ia32_pmovqb256_
 TARGET_BUILTIN(__builtin_ia32_pmovqb256mem_mask, "vV16c*V4LLiUc", "n", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovqd128_mask, "V4iV2LLiV4iUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovqd128mem_mask, "vV4i*V2LLiUc", "n", 
"avx512

r332322 - [X86] Revert part of r332266: Use __builtin_convertvector to replace some of the avx512 truncate builtins.

2018-05-14 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Mon May 14 20:17:52 2018
New Revision: 332322

URL: http://llvm.org/viewvc/llvm-project?rev=332322&view=rev
Log:
[X86] Revert part of r332266: Use __builtin_convertvector to replace some of 
the avx512 truncate builtins.

The masking doesn't work right in the backend for the ones that produce byte or 
word elements without avx512bw.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332322&r1=332321&r2=332322&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 14 20:17:52 2018
@@ -1717,12 +1717,15 @@ TARGET_BUILTIN(__builtin_ia32_pmovusqw12
 TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2LLiUc", "n", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4LLiV8sUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc", "n", 
"avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "nc", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "n", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "n", 
"avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "nc", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "n", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "nc", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "n", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "n", 
"avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "n", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc", "n", 
"avx512vl,avx512bw")
@@ -1732,6 +1735,7 @@ TARGET_BUILTIN(__builtin_ia32_pmovdb256m
 TARGET_BUILTIN(__builtin_ia32_pmovwb256mem_mask, "vV16c*V16sUs", "n", 
"avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc", "nc", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2LLiV16cUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2LLiUc", "n", 
"avx512vl")

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=332322&r1=332321&r2=332322&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Mon May 14 20:17:52 2018
@@ -7607,17 +7607,16 @@ _mm512_cvtepi32_epi8 (__m512i __A)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
 {
-  return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M,
- 
(__v16qi)_mm512_cvtepi32_epi8(__A),
- (__v16qi)__O);
+  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+  (__v16qi) __O, __M);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
 {
-  return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M,
- 
(__v16qi)_mm512_cvtepi32_epi8(__A),
- (__v16qi)_mm_setzero_si128());
+  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+  (__v16qi) _mm_setzero_si128 (),
+  __M);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
@@ -7635,17 +7634,16 @@ _mm512_cvtepi32_epi16 (__m512i __A)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
 {
-  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
- 
(__v16hi)_mm512_cvtepi32_epi16(__A),
- (__v16hi)__O);
+  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+  (__v16hi) __O, __M);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
 {
-  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__

r332738 - [X86] Fix a bad cast from mask16 to mask8 in _mm256_mask_cvtepi16_epi8 introduced in r332266.

2018-05-18 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri May 18 10:18:46 2018
New Revision: 332738

URL: http://llvm.org/viewvc/llvm-project?rev=332738&view=rev
Log:
[X86] Fix a bad cast from mask16 to mask8 in _mm256_mask_cvtepi16_epi8 
introduced in r332266.

Modified:
cfe/trunk/lib/Headers/avx512vlbwintrin.h

Modified: cfe/trunk/lib/Headers/avx512vlbwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlbwintrin.h?rev=332738&r1=332737&r2=332738&view=diff
==
--- cfe/trunk/lib/Headers/avx512vlbwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlbwintrin.h Fri May 18 10:18:46 2018
@@ -1556,14 +1556,14 @@ _mm256_cvtepi16_epi8 (__m256i __A) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
-  return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M,
+  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
  
(__v16qi)_mm256_cvtepi16_epi8(__A),
  (__v16qi)__O);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) {
-  return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M,
+  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
  
(__v16qi)_mm256_cvtepi16_epi8(__A),
  (__v16qi)_mm_setzero_si128());
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r332825 - [X86] Remove mask arguments from permvar builtins/intrinsics. Use a select in IR instead.

2018-05-20 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sun May 20 16:34:10 2018
New Revision: 332825

URL: http://llvm.org/viewvc/llvm-project?rev=332825&view=rev
Log:
[X86] Remove mask arguments from permvar builtins/intrinsics. Use a select in 
IR instead.

Someday maybe we'll use selects for all the builtins.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vbmiintrin.h
cfe/trunk/lib/Headers/avx512vbmivlintrin.h
cfe/trunk/lib/Headers/avx512vlbwintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vbmi-builtins.c
cfe/trunk/test/CodeGen/avx512vbmivl-builtin.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c
cfe/trunk/test/CodeGen/avx512vlbw-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332825&r1=332824&r2=332825&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sun May 20 16:34:10 2018
@@ -1766,18 +1766,18 @@ TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_
 TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "nc", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "nc", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_vfnmsubss3_mask3, "V4fV4fV4fV4fUcIi", "nc", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvarhi512_mask, "V32sV32sV32sV32sUi", "nc", 
"avx512bw")
-TARGET_BUILTIN(__builtin_ia32_permvardf512_mask, "V8dV8dV8LLiV8dUc", "nc", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvardi512_mask, "V8LLiV8LLiV8LLiV8LLiUc", 
"nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvarsf512_mask, "V16fV16fV16iV16fUs", "nc", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvarsi512_mask, "V16iV16iV16iV16iUs", "nc", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvarqi512_mask, "V64cV64cV64cV64cULLi", "nc", 
"avx512vbmi")
-TARGET_BUILTIN(__builtin_ia32_permvarqi128_mask, "V16cV16cV16cV16cUs", "nc", 
"avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarqi256_mask, "V32cV32cV32cV32cUi", "nc", 
"avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarhi128_mask, "V8sV8sV8sV8sUc", "nc", 
"avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarhi256_mask, "V16sV16sV16sV16sUs", "nc", 
"avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvardf256_mask, "V4dV4dV4LLiV4dUc", "nc", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvardi256_mask, "V4LLiV4LLiV4LLiV4LLiUc", 
"nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_permvarhi512, "V32sV32sV32s", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8LLi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8LLiV8LLiV8LLi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permvarsf512, "V16fV16fV16i", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permvarsi512, "V16iV16iV16i", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permvarqi512, "V64cV64cV64c", "nc", "avx512vbmi")
+TARGET_BUILTIN(__builtin_ia32_permvarqi128, "V16cV16cV16c", "nc", 
"avx512vbmi,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_permvarqi256, "V32cV32cV32c", "nc", 
"avx512vbmi,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_permvarhi128, "V8sV8sV8s", "nc", 
"avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_permvarhi256, "V16sV16sV16s", "nc", 
"avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_permvardf256, "V4dV4dV4LLi", "nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_permvardi256, "V4LLiV4LLiV4LLi", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc", "nc", 
"avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc", "nc", 
"avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc", "nc", 
"avx512dq,avx512vl")

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=332825&r1=332824&r2=332825&view=diff
==
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Sun May 20 16:34:10 2018
@@ -2034,30 +2034,25 @@ _mm512_maskz_broadcastw_epi16 (__mmask32
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B,
- (__v32hi) __A,
- (__v32hi) _mm512_undefined_epi32 (),
- (__mmask32) -1);
+  return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
 __m512i __B)
 {
-  return (__m512i) __builtin_ia32_p

r332829 - [X86] Remove some unused builtins.

2018-05-20 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sun May 20 20:36:57 2018
New Revision: 332829

URL: http://llvm.org/viewvc/llvm-project?rev=332829&view=rev
Log:
[X86] Remove some unused builtins.

These were upgraded to native shufflevectors months ago.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Sema/SemaChecking.cpp

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332829&r1=332828&r2=332829&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sun May 20 20:36:57 2018
@@ -1616,14 +1616,6 @@ TARGET_BUILTIN(__builtin_ia32_pternlogq1
 TARGET_BUILTIN(__builtin_ia32_pternlogq128_maskz, "V2LLiV2LLiV2LLiV2LLiIiUc", 
"nc", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pternlogq256_mask, "V4LLiV4LLiV4LLiV4LLiIiUc", 
"nc", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pternlogq256_maskz, "V4LLiV4LLiV4LLiV4LLiIiUc", 
"nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_mask, "V16fV16fV16fIiV16fUs", "nc", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_mask, "V8dV8dV8dIiV8dUc", "nc", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_mask, "V16iV16iV16iIiV16iUs", "nc", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", 
"nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256_mask, "V8fV8fV8fIiV8fUc", "nc", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256_mask, "V4dV4dV4dIiV4dUc", "nc", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256_mask, "V8iV8iV8iIiV8iUc", "nc", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", 
"nc", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_sqrtsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_sqrtss_round_mask, "V4fV4fV4fV4fUcIi", "nc", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_rsqrt14pd128_mask, "V2dV2dV2dUc", "nc", 
"avx512vl")

Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=332829&r1=332828&r2=332829&view=diff
==
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Sun May 20 20:36:57 2018
@@ -2574,10 +2574,6 @@ bool Sema::CheckX86BuiltinFunctionCall(u
 i = 1; l = 0; u = 7;
 break;
   case X86::BI__builtin_ia32_sha1rnds4:
-  case X86::BI__builtin_ia32_shuf_f32x4_256_mask:
-  case X86::BI__builtin_ia32_shuf_f64x2_256_mask:
-  case X86::BI__builtin_ia32_shuf_i32x4_256_mask:
-  case X86::BI__builtin_ia32_shuf_i64x2_256_mask:
 i = 2; l = 0; u = 3;
 break;
   case X86::BI__builtin_ia32_vpermil2pd:
@@ -2696,10 +2692,6 @@ bool Sema::CheckX86BuiltinFunctionCall(u
   case X86::BI__builtin_ia32_palignr512_mask:
   case X86::BI__builtin_ia32_vcomisd:
   case X86::BI__builtin_ia32_vcomiss:
-  case X86::BI__builtin_ia32_shuf_f32x4_mask:
-  case X86::BI__builtin_ia32_shuf_f64x2_mask:
-  case X86::BI__builtin_ia32_shuf_i32x4_mask:
-  case X86::BI__builtin_ia32_shuf_i64x2_mask:
   case X86::BI__builtin_ia32_dbpsadbw128_mask:
   case X86::BI__builtin_ia32_dbpsadbw256_mask:
   case X86::BI__builtin_ia32_dbpsadbw512_mask:


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r332830 - [X86] Remove some preprocessor feature checks from intrinsic headers

2018-05-20 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sun May 20 23:07:49 2018
New Revision: 332830

URL: http://llvm.org/viewvc/llvm-project?rev=332830&view=rev
Log:
[X86] Remove some preprocessor feature checks from intrinsic headers

Summary:
These look to be a couple things that weren't removed when we switched to 
target attribute.

The popcnt makes including just smmintrin.h also include popcntintrin.h. The 
popcnt file itself already contains target attrributes.

The prefetch ones are just wrappers around __builtin_prefetch which we have 
graceful fallbacks for in the backend if the exact instruction isn't available. 
So there's no reason to hide them. And it makes them available in functions 
that have the write target attribute but not a -march command line flag.

Reviewers: echristo, RKSimon, spatel, DavidKreitzer

Reviewed By: echristo

Subscribers: cfe-commits

Differential Revision: https://reviews.llvm.org/D47029

Modified:
cfe/trunk/lib/Headers/prfchwintrin.h
cfe/trunk/lib/Headers/smmintrin.h

Modified: cfe/trunk/lib/Headers/prfchwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/prfchwintrin.h?rev=332830&r1=332829&r2=332830&view=diff
==
--- cfe/trunk/lib/Headers/prfchwintrin.h (original)
+++ cfe/trunk/lib/Headers/prfchwintrin.h Sun May 20 23:07:49 2018
@@ -28,7 +28,6 @@
 #ifndef __PRFCHWINTRIN_H
 #define __PRFCHWINTRIN_H
 
-#if defined(__PRFCHW__) || defined(__3dNOW__)
 /// Loads a memory sequence containing the specified memory address into
 ///all data cache levels. The cache-coherency state is set to exclusive.
 ///Data can be read from and written to the cache line without additional
@@ -66,6 +65,5 @@ _m_prefetchw(void *__P)
 {
   __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
 }
-#endif
 
 #endif /* __PRFCHWINTRIN_H */

Modified: cfe/trunk/lib/Headers/smmintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/smmintrin.h?rev=332830&r1=332829&r2=332830&view=diff
==
--- cfe/trunk/lib/Headers/smmintrin.h (original)
+++ cfe/trunk/lib/Headers/smmintrin.h Sun May 20 23:07:49 2018
@@ -2458,8 +2458,6 @@ _mm_crc32_u64(unsigned long long __C, un
 
 #undef __DEFAULT_FN_ATTRS
 
-#ifdef __POPCNT__
 #include 
-#endif
 
 #endif /* __SMMINTRIN_H */


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r332882 - [X86] Use __builtin_convertvector to implement some of the packed integer to packed float conversion intrinsics.

2018-05-21 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Mon May 21 13:19:17 2018
New Revision: 332882

URL: http://llvm.org/viewvc/llvm-project?rev=332882&view=rev
Log:
[X86] Use __builtin_convertvector to implement some of the packed integer to 
packed float conversion intrinsics.

I believe this is safe assuming default default FP environment. The conversion 
might be inexact, but it can never overflow the FP type so this shouldn't be 
undefined behavior for the uitofp/sitofp instructions.

We already do something similar for scalar conversions.

Differential Revision: https://reviews.llvm.org/D46863

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vldqintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/lib/Headers/avxintrin.h
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/test/CodeGen/avx-builtins.c
cfe/trunk/test/CodeGen/avx512dq-builtins.c
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c
cfe/trunk/test/CodeGen/avx512vldq-builtins.c
cfe/trunk/test/CodeGen/builtins-x86.c
cfe/trunk/test/CodeGen/sse2-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332882&r1=332881&r2=332882&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 21 13:19:17 2018
@@ -320,7 +320,6 @@ TARGET_BUILTIN(__builtin_ia32_movnti, "v
 TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "nc", "sse2")
 TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "nc", "sse2")
 TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2ps, "V4fV4i", "nc", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "nc", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "nc", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "nc", "sse2")
@@ -1200,8 +1199,6 @@ TARGET_BUILTIN(__builtin_ia32_cvttpd2udq
 TARGET_BUILTIN(__builtin_ia32_cvttpd2udq256_mask, "V4iV4dV4iUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "nc", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtudq2ps128_mask, "V4fV4iV4fUc", "nc", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtudq2ps256_mask, "V8fV8iV8fUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "nc", 
"avx512vl")
@@ -1363,8 +1360,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtps2qq12
 TARGET_BUILTIN(__builtin_ia32_cvtps2qq256_mask, "V4LLiV4fV4LLiUc", "nc", 
"avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtps2uqq128_mask, "V2LLiV4fV2LLiUc", "nc", 
"avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtps2uqq256_mask, "V4LLiV4fV4LLiUc", "nc", 
"avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2pd128_mask, "V2dV2LLiV2dUc", "nc", 
"avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2pd256_mask, "V4dV4LLiV4dUc", "nc", 
"avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtqq2ps128_mask, "V4fV2LLiV4fUc", "nc", 
"avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtqq2ps256_mask, "V4fV4LLiV4fUc", "nc", 
"avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2qq128_mask, "V2LLiV2dV2LLiUc", "nc", 
"avx512vl,avx512dq")
@@ -1375,8 +1370,6 @@ TARGET_BUILTIN(__builtin_ia32_cvttps2qq1
 TARGET_BUILTIN(__builtin_ia32_cvttps2qq256_mask, "V4LLiV4fV4LLiUc", "nc", 
"avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvttps2uqq128_mask, "V2LLiV4fV2LLiUc", "nc", 
"avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvttps2uqq256_mask, "V4LLiV4fV4LLiUc", "nc", 
"avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd128_mask, "V2dV2LLiV2dUc", "nc", 
"avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd256_mask, "V4dV4LLiV4dUc", "nc", 
"avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps128_mask, "V4fV2LLiV4fUc", "nc", 
"avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps256_mask, "V4fV4LLiV4fUc", "nc", 
"avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_rangepd128_mask, "V2dV2dV2dIiV2dUc", "nc", 
"avx512vl,avx512dq")

Modified: cfe/trunk/lib/Headers/avx512dqintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=332882&r1=332881&r2=332882&view=diff
==
--- cfe/trunk/lib/Headers/avx512dqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512dqintrin.h Mon May 21 13:19:17 2018
@@ -361,26 +361,21 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U,
 
 static __inline__ __m512d __DEFAULT_

r332891 - [X86] Remove masking from pternlog llvm intrinsics and use a select instruction instead.

2018-05-21 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Mon May 21 13:58:23 2018
New Revision: 332891

URL: http://llvm.org/viewvc/llvm-project?rev=332891&view=rev
Log:
[X86] Remove masking from pternlog llvm intrinsics and use a select instruction 
instead.

Because the intrinsics in the headers are implemented as macros, we can't just 
use a select builtin and pternlog builtin. This would require one of the macro 
arguments to be used twice. Depending on what was passed to the macro we could 
expand an expression twice leading to weird behavior. We could maybe declare 
our local variable in the macro, but that would need to worry about name 
collisions.

To avoid that just generate IR directly in CGBuiltin.cpp.

Differential Revision: https://reviews.llvm.org/D47125

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=332891&r1=332890&r2=332891&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon May 21 13:58:23 2018
@@ -8445,6 +8445,37 @@ static Value *EmitX86Muldq(CodeGenFuncti
   return CGF.Builder.CreateMul(LHS, RHS);
 }
 
+// Emit a masked pternlog intrinsic. This only exists because the header has to
+// use a macro and we aren't able to pass the input argument to a pternlog
+// builtin and a select builtin without evaluating it twice.
+static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
+ ArrayRef Ops) {
+  llvm::Type *Ty = Ops[0]->getType();
+
+  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
+  unsigned EltWidth = Ty->getScalarSizeInBits();
+  Intrinsic::ID IID;
+  if (VecWidth == 128 && EltWidth == 32)
+IID = Intrinsic::x86_avx512_pternlog_d_128;
+  else if (VecWidth == 256 && EltWidth == 32)
+IID = Intrinsic::x86_avx512_pternlog_d_256;
+  else if (VecWidth == 512 && EltWidth == 32)
+IID = Intrinsic::x86_avx512_pternlog_d_512;
+  else if (VecWidth == 128 && EltWidth == 64)
+IID = Intrinsic::x86_avx512_pternlog_q_128;
+  else if (VecWidth == 256 && EltWidth == 64)
+IID = Intrinsic::x86_avx512_pternlog_q_256;
+  else if (VecWidth == 512 && EltWidth == 64)
+IID = Intrinsic::x86_avx512_pternlog_q_512;
+  else
+llvm_unreachable("Unexpected intrinsic");
+
+  Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
+  Ops.drop_back());
+  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
+  return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
+}
+
 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, 
   llvm::Type *DstTy) {
   unsigned NumberOfElements = DstTy->getVectorNumElements();
@@ -9159,6 +9190,22 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   case X86::BI__builtin_ia32_pmuldq512:
 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
 
+  case X86::BI__builtin_ia32_pternlogd512_mask:
+  case X86::BI__builtin_ia32_pternlogq512_mask:
+  case X86::BI__builtin_ia32_pternlogd128_mask:
+  case X86::BI__builtin_ia32_pternlogd256_mask:
+  case X86::BI__builtin_ia32_pternlogq128_mask:
+  case X86::BI__builtin_ia32_pternlogq256_mask:
+return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
+
+  case X86::BI__builtin_ia32_pternlogd512_maskz:
+  case X86::BI__builtin_ia32_pternlogq512_maskz:
+  case X86::BI__builtin_ia32_pternlogd128_maskz:
+  case X86::BI__builtin_ia32_pternlogd256_maskz:
+  case X86::BI__builtin_ia32_pternlogq128_maskz:
+  case X86::BI__builtin_ia32_pternlogq256_maskz:
+return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
+
   // 3DNow!
   case X86::BI__builtin_ia32_pswapdsf:
   case X86::BI__builtin_ia32_pswapdsi: {

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=332891&r1=332890&r2=332891&view=diff
==
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Mon May 21 13:58:23 2018
@@ -4494,37 +4494,41 @@ __m512i test_mm512_maskz_srlv_epi64(__mm
 
 __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) {
   // CHECK-LABEL: @test_mm512_ternarylogic_epi32
-  // CHECK: @llvm.x86.avx512.mask.pternlog.d.512
+  // CHECK: @llvm.x86.avx512.pternlog.d.512
   return _mm512_ternarylogic_epi32(__A, __B, __C, 4); 
 }
 
 __m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i 
__B, __m512i __C) {
   // CHECK-LABEL: @test_mm512_mask_ternarylogic_epi32
-  // CHECK: @llvm.x86.avx512.mask.pternlog.d.512
+  // CHECK: @llvm.x86.avx512.pternlog.d.512
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_tern

r332909 - [X86] Remove a builtin that should have been removed in r332882.

2018-05-21 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Mon May 21 15:10:02 2018
New Revision: 332909

URL: http://llvm.org/viewvc/llvm-project?rev=332909&view=rev
Log:
[X86] Remove a builtin that should have been removed in r332882.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/test/CodeGen/builtins-x86.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332909&r1=332908&r2=332909&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 21 15:10:02 2018
@@ -477,7 +477,6 @@ TARGET_BUILTIN(__builtin_ia32_cmpps, "V4
 TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "nc", "avx")
 TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "nc", "avx")
 TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "nc", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "nc", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "nc", "avx")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "nc", "avx")

Modified: cfe/trunk/test/CodeGen/builtins-x86.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-x86.c?rev=332909&r1=332908&r2=332909&view=diff
==
--- cfe/trunk/test/CodeGen/builtins-x86.c (original)
+++ cfe/trunk/test/CodeGen/builtins-x86.c Mon May 21 15:10:02 2018
@@ -433,7 +433,6 @@ void f0() {
   tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7);
   tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0);
   tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0);
-  tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
   tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
   tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
   tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r332929 - [X86] Prevent inclusion of __wmmintrin_aes.h and __wmmintrin_pclmul.h without including wmmintrin.h

2018-05-21 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Mon May 21 19:02:13 2018
New Revision: 332929

URL: http://llvm.org/viewvc/llvm-project?rev=332929&view=rev
Log:
[X86] Prevent inclusion of __wmmintrin_aes.h and __wmmintrin_pclmul.h without 
including wmmintrin.h

Modified:
cfe/trunk/lib/Headers/__wmmintrin_aes.h
cfe/trunk/lib/Headers/__wmmintrin_pclmul.h

Modified: cfe/trunk/lib/Headers/__wmmintrin_aes.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__wmmintrin_aes.h?rev=332929&r1=332928&r2=332929&view=diff
==
--- cfe/trunk/lib/Headers/__wmmintrin_aes.h (original)
+++ cfe/trunk/lib/Headers/__wmmintrin_aes.h Mon May 21 19:02:13 2018
@@ -20,11 +20,14 @@
  *
  *===---===
  */
+
+#ifndef __WMMINTRIN_H
+#error "Never use <__wmmintrin_aes.h> directly; include  instead."
+#endif
+
 #ifndef __WMMINTRIN_AES_H
 #define __WMMINTRIN_AES_H
 
-#include 
-
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, 
__target__("aes")))
 

Modified: cfe/trunk/lib/Headers/__wmmintrin_pclmul.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__wmmintrin_pclmul.h?rev=332929&r1=332928&r2=332929&view=diff
==
--- cfe/trunk/lib/Headers/__wmmintrin_pclmul.h (original)
+++ cfe/trunk/lib/Headers/__wmmintrin_pclmul.h Mon May 21 19:02:13 2018
@@ -20,6 +20,11 @@
  *
  *===---===
  */
+
+#ifndef __WMMINTRIN_H
+#error "Never use <__wmmintrin_pclmul.h> directly; include  
instead."
+#endif
+
 #ifndef __WMMINTRIN_PCLMUL_H
 #define __WMMINTRIN_PCLMUL_H
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333014 - [X86] Move 128-bit f16c intrinsics to __emmintrin_f16c.h include from emmintrin.h. Move 256-bit f16c intrinsics back to f16cintrin.h

2018-05-22 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue May 22 11:54:19 2018
New Revision: 333014

URL: http://llvm.org/viewvc/llvm-project?rev=333014&view=rev
Log:
[X86] Move 128-bit f16c intrinsics to __emmintrin_f16c.h include from 
emmintrin.h. Move 256-bit f16c intrinsics back to f16cintrin.h

Intel documents the 128-bit versions as being in emmintrin.h and the 256-bit 
version as being in immintrin.h.

This patch makes a new __emmtrin_f16c.h to hold the 128-bit versions to be 
included from emmintrin.h. And makes the existing f16cintrin.h contain the 
256-bit versions and include it from immintrin.h with an error if its included 
directly.

Differential Revision: https://reviews.llvm.org/D47174

Added:
cfe/trunk/lib/Headers/__emmintrin_f16c.h
  - copied, changed from r332998, cfe/trunk/lib/Headers/f16cintrin.h
Modified:
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/lib/Headers/f16cintrin.h
cfe/trunk/lib/Headers/immintrin.h

Copied: cfe/trunk/lib/Headers/__emmintrin_f16c.h (from r332998, 
cfe/trunk/lib/Headers/f16cintrin.h)
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__emmintrin_f16c.h?p2=cfe/trunk/lib/Headers/__emmintrin_f16c.h&p1=cfe/trunk/lib/Headers/f16cintrin.h&r1=332998&r2=333014&rev=333014&view=diff
==
--- cfe/trunk/lib/Headers/f16cintrin.h (original)
+++ cfe/trunk/lib/Headers/__emmintrin_f16c.h Tue May 22 11:54:19 2018
@@ -1,4 +1,4 @@
-/*=== f16cintrin.h - F16C intrinsics ---===
+/*=== __emmintrin_f16c.h - F16C intrinsics -===
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to 
deal
@@ -21,12 +21,12 @@
  *===---===
  */
 
-#if !defined __X86INTRIN_H && !defined __EMMINTRIN_H && !defined __IMMINTRIN_H
-#error "Never use  directly; include  instead."
+#if !defined __EMMINTRIN_H
+#error "Never use <__emmintrin_f16c.h> directly; include  
instead."
 #endif
 
-#ifndef __F16CINTRIN_H
-#define __F16CINTRIN_H
+#ifndef __EMMINTRIN_F16C_H
+#define __EMMINTRIN_F16C_H
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS \
@@ -121,4 +121,4 @@ _mm_cvtph_ps(__m128i __a)
 
 #undef __DEFAULT_FN_ATTRS
 
-#endif /* __F16CINTRIN_H */
+#endif /* __EMMINTRIN_F16C_H */

Modified: cfe/trunk/lib/Headers/emmintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=333014&r1=333013&r2=333014&view=diff
==
--- cfe/trunk/lib/Headers/emmintrin.h (original)
+++ cfe/trunk/lib/Headers/emmintrin.h Tue May 22 11:54:19 2018
@@ -44,7 +44,7 @@ typedef unsigned char __v16qu __attribut
  * appear in the interface though. */
 typedef signed char __v16qs __attribute__((__vector_size__(16)));
 
-#include 
+#include <__emmintrin_f16c.h>
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, 
__target__("sse2")))

Modified: cfe/trunk/lib/Headers/f16cintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/f16cintrin.h?rev=333014&r1=333013&r2=333014&view=diff
==
--- cfe/trunk/lib/Headers/f16cintrin.h (original)
+++ cfe/trunk/lib/Headers/f16cintrin.h Tue May 22 11:54:19 2018
@@ -21,8 +21,8 @@
  *===---===
  */
 
-#if !defined __X86INTRIN_H && !defined __EMMINTRIN_H && !defined __IMMINTRIN_H
-#error "Never use  directly; include  instead."
+#if !defined __IMMINTRIN_H
+#error "Never use  directly; include  instead."
 #endif
 
 #ifndef __F16CINTRIN_H
@@ -32,63 +32,24 @@
 #define __DEFAULT_FN_ATTRS \
   __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
 
-/// Converts a 16-bit half-precision float value into a 32-bit float
-///value.
-///
-/// \headerfile 
-///
-/// This intrinsic corresponds to the  VCVTPH2PS  instruction.
-///
-/// \param __a
-///A 16-bit half-precision float value.
-/// \returns The converted 32-bit float value.
-static __inline float __DEFAULT_FN_ATTRS
-_cvtsh_ss(unsigned short __a)
-{
-  __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
-  __v4sf r = __builtin_ia32_vcvtph2ps(v);
-  return r[0];
-}
-
-/// Converts a 32-bit single-precision float value to a 16-bit
-///half-precision float value.
-///
-/// \headerfile 
-///
-/// \code
-/// unsigned short _cvtss_sh(float a, const int imm);
-/// \endcode
-///
-/// This intrinsic corresponds to the  VCVTPS2PH  instruction.
-///
-/// \param a
-///A 32-bit single-precision float value to be converted to a 16-bit
-///half-precision float value.
-/// \param imm
-///An immediate value controlling rounding using bits [2:0]: \n

r333020 - [X86] Add __emmintrin_f16c.h to module map and CMakeLists.

2018-05-22 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue May 22 13:19:05 2018
New Revision: 333020

URL: http://llvm.org/viewvc/llvm-project?rev=333020&view=rev
Log:
[X86] Add __emmintrin_f16c.h to module map and CMakeLists.

I missed this in r333014

Modified:
cfe/trunk/lib/Headers/CMakeLists.txt
cfe/trunk/lib/Headers/module.modulemap

Modified: cfe/trunk/lib/Headers/CMakeLists.txt
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/CMakeLists.txt?rev=333020&r1=333019&r2=333020&view=diff
==
--- cfe/trunk/lib/Headers/CMakeLists.txt (original)
+++ cfe/trunk/lib/Headers/CMakeLists.txt Tue May 22 13:19:05 2018
@@ -46,6 +46,7 @@ set(files
   clflushoptintrin.h
   clwbintrin.h
   emmintrin.h
+  __emmintrin_f16c.h
   f16cintrin.h
   float.h
   fma4intrin.h

Modified: cfe/trunk/lib/Headers/module.modulemap
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/module.modulemap?rev=333020&r1=333019&r2=333020&view=diff
==
--- cfe/trunk/lib/Headers/module.modulemap (original)
+++ cfe/trunk/lib/Headers/module.modulemap Tue May 22 13:19:05 2018
@@ -95,9 +95,14 @@ module _Builtin_intrinsics [system] [ext
 
 explicit module sse2 {
   export sse
+  export f16c_128
   header "emmintrin.h"
 }
 
+explicit f16c_128 {
+  header "__emmintrin_f16c.h"
+}
+
 explicit module sse3 {
   export sse2
   header "pmmintrin.h"


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333023 - [X86] Add two missing #endif directives to immintrin.h that should have been in r333014.

2018-05-22 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue May 22 13:33:04 2018
New Revision: 333023

URL: http://llvm.org/viewvc/llvm-project?rev=333023&view=rev
Log:
[X86] Add two missing #endif directives to immintrin.h that should have been in 
r333014.

Modified:
cfe/trunk/lib/Headers/immintrin.h

Modified: cfe/trunk/lib/Headers/immintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=333023&r1=333022&r2=333023&view=diff
==
--- cfe/trunk/lib/Headers/immintrin.h (original)
+++ cfe/trunk/lib/Headers/immintrin.h Tue May 22 13:33:04 2018
@@ -68,9 +68,11 @@
 
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
 #include 
+#endif
 
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
 #include 
+#endif
 
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
 #include 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333026 - [X86] Another attempt at fixing the intrinsic module map for rr333014.

2018-05-22 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue May 22 13:48:20 2018
New Revision: 333026

URL: http://llvm.org/viewvc/llvm-project?rev=333026&view=rev
Log:
[X86] Another attempt at fixing the intrinsic module map for rr333014.

Modified:
cfe/trunk/lib/Headers/module.modulemap

Modified: cfe/trunk/lib/Headers/module.modulemap
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/module.modulemap?rev=333026&r1=333025&r2=333026&view=diff
==
--- cfe/trunk/lib/Headers/module.modulemap (original)
+++ cfe/trunk/lib/Headers/module.modulemap Tue May 22 13:48:20 2018
@@ -99,7 +99,7 @@ module _Builtin_intrinsics [system] [ext
   header "emmintrin.h"
 }
 
-explicit f16c_128 {
+explicit module f16c_128 {
   header "__emmintrin_f16c.h"
 }
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333027 - [X86] Remove mask argument from some builtins that are handled completely in CGBuiltin.cpp. Just wrap a select builtin around them in the header file instead.

2018-05-22 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue May 22 13:48:24 2018
New Revision: 333027

URL: http://llvm.org/viewvc/llvm-project?rev=333027&view=rev
Log:
[X86] Remove mask argument from some builtins that are handled completely in 
CGBuiltin.cpp. Just wrap a select builtin around them in the header file 
instead.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512cdintrin.h
cfe/trunk/lib/Headers/avx512vlcdintrin.h
cfe/trunk/lib/Sema/SemaChecking.cpp

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=333027&r1=333026&r2=333027&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue May 22 13:48:24 2018
@@ -1102,8 +1102,8 @@ TARGET_BUILTIN(__builtin_ia32_vpconflict
 TARGET_BUILTIN(__builtin_ia32_vpconflictsi_256_mask, "V8iV8iV8iUc", "nc", 
"avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512_mask, "V8LLiV8LLiV8LLiUc", 
"nc", "avx512cd")
 TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "nc", 
"avx512cd")
-TARGET_BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "nc", 
"avx512cd")
-TARGET_BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "nc", 
"avx512cd")
+TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "nc", "avx512cd")
+TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8LLiV8LLi", "nc", "avx512cd")
 
 TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "nc", 
"avx512vpopcntdq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2LLiV2LLi", "nc", 
"avx512vpopcntdq,avx512vl")
@@ -1550,10 +1550,10 @@ TARGET_BUILTIN(__builtin_ia32_rcp14pd128
 TARGET_BUILTIN(__builtin_ia32_rcp14pd256_mask, "V4dV4dV4dUc", "nc", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rcp14ps128_mask, "V4fV4fV4fUc", "nc", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rcp14ps256_mask, "V8fV8fV8fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntd_128_mask, "V4iV4iV4iUc", "nc", 
"avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntd_256_mask, "V8iV8iV8iUc", "nc", 
"avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntq_128_mask, "V2LLiV2LLiV2LLiUc", "nc", 
"avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntq_256_mask, "V4LLiV4LLiV4LLiUc", "nc", 
"avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vplzcntd_128, "V4iV4i", "nc", 
"avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vplzcntd_256, "V8iV8i", "nc", 
"avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vplzcntq_128, "V2LLiV2LLi", "nc", 
"avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vplzcntq_256, "V4LLiV4LLi", "nc", 
"avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtsd2si32, "iV2dIi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi32, "UiV2dIi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtss2si32, "iV4fIi", "nc", "avx512f")
@@ -1778,7 +1778,7 @@ TARGET_BUILTIN(__builtin_ia32_kortestzhi
 TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_palignr512_mask, "V64cV64cV64cIiV64cULLi", "nc", 
"avx512bw")
+TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw128_mask, "V8sV16cV16cIiV8sUc", "nc", 
"avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw256_mask, "V16sV32cV32cIiV16sUs", "nc", 
"avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw512_mask, "V32sV64cV64cIiV32sUi", "nc", 
"avx512bw")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=333027&r1=333026&r2=333027&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue May 22 13:48:24 2018
@@ -8890,7 +8890,7 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   }
   case X86::BI__builtin_ia32_palignr128:
   case X86::BI__builtin_ia32_palignr256:
-  case X86::BI__builtin_ia32_palignr512_mask: {
+  case X86::BI__builtin_ia32_palignr512: {
 unsigned ShiftVal = cast(Ops[2])->getZExtValue();
 
 unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
@@ -8920,15 +8920,9 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   }
 }
 
-Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
-   makeArrayRef(Indices, NumElts),
-   "palignr");
-
-// If this isn't a masked builtin, just return the align operation.
-if (Ops.size() == 3)
-  return Align;
-
-return EmitX86Select(*this, Ops[4], Align, Ops[3])

r333033 - [X86] As mentioned in post-commit feedback in D47174, move the 128 bit f16c intrinsics into f16cintrin.h and remove __emmintrin_f16c.h

2018-05-22 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue May 22 15:19:19 2018
New Revision: 333033

URL: http://llvm.org/viewvc/llvm-project?rev=333033&view=rev
Log:
[X86] As mentioned in post-commit feedback in D47174, move the 128 bit f16c 
intrinsics into f16cintrin.h and remove __emmintrin_f16c.h

These were included in emmintrin.h to match Intel Intrinsics Guide 
documentation. But this is because icc is capable of emulating them on targets 
that don't support F16C using library calls. Clang/LLVM doesn't have this 
emulation support. So it makes more sense to include them in immintrin.h 
instead.

I've left a comment behind to hopefully deter someone from trying to move them 
again in the future.

Removed:
cfe/trunk/lib/Headers/__emmintrin_f16c.h
Modified:
cfe/trunk/lib/Headers/CMakeLists.txt
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/lib/Headers/f16cintrin.h
cfe/trunk/lib/Headers/module.modulemap

Modified: cfe/trunk/lib/Headers/CMakeLists.txt
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/CMakeLists.txt?rev=333033&r1=333032&r2=333033&view=diff
==
--- cfe/trunk/lib/Headers/CMakeLists.txt (original)
+++ cfe/trunk/lib/Headers/CMakeLists.txt Tue May 22 15:19:19 2018
@@ -46,7 +46,6 @@ set(files
   clflushoptintrin.h
   clwbintrin.h
   emmintrin.h
-  __emmintrin_f16c.h
   f16cintrin.h
   float.h
   fma4intrin.h

Removed: cfe/trunk/lib/Headers/__emmintrin_f16c.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__emmintrin_f16c.h?rev=333032&view=auto
==
--- cfe/trunk/lib/Headers/__emmintrin_f16c.h (original)
+++ cfe/trunk/lib/Headers/__emmintrin_f16c.h (removed)
@@ -1,124 +0,0 @@
-/*=== __emmintrin_f16c.h - F16C intrinsics -===
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to 
deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- *===---===
- */
-
-#if !defined __EMMINTRIN_H
-#error "Never use <__emmintrin_f16c.h> directly; include  
instead."
-#endif
-
-#ifndef __EMMINTRIN_F16C_H
-#define __EMMINTRIN_F16C_H
-
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS \
-  __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
-
-/// Converts a 16-bit half-precision float value into a 32-bit float
-///value.
-///
-/// \headerfile 
-///
-/// This intrinsic corresponds to the  VCVTPH2PS  instruction.
-///
-/// \param __a
-///A 16-bit half-precision float value.
-/// \returns The converted 32-bit float value.
-static __inline float __DEFAULT_FN_ATTRS
-_cvtsh_ss(unsigned short __a)
-{
-  __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
-  __v4sf r = __builtin_ia32_vcvtph2ps(v);
-  return r[0];
-}
-
-/// Converts a 32-bit single-precision float value to a 16-bit
-///half-precision float value.
-///
-/// \headerfile 
-///
-/// \code
-/// unsigned short _cvtss_sh(float a, const int imm);
-/// \endcode
-///
-/// This intrinsic corresponds to the  VCVTPS2PH  instruction.
-///
-/// \param a
-///A 32-bit single-precision float value to be converted to a 16-bit
-///half-precision float value.
-/// \param imm
-///An immediate value controlling rounding using bits [2:0]: \n
-///000: Nearest \n
-///001: Down \n
-///010: Up \n
-///011: Truncate \n
-///1XX: Use MXCSR.RC for rounding
-/// \returns The converted 16-bit half-precision float value.
-#define _cvtss_sh(a, imm) __extension__ ({ \
-  (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
- (imm)))[0]); })
-
-/// Converts a 128-bit vector containing 32-bit float values into a
-///128-bit vector containing 16-bit half-precision float values.
-///
-/// \headerfile 
-///
-/// \code
-/// __m128i _mm_cvtps_ph(__m128 a, const int imm);
-/// \endcode
-///
-/// T

r333062 - [X86] In the floating point max reduction intrinsics, negate infinity before feeding it to set1.

2018-05-22 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue May 22 22:51:52 2018
New Revision: 333062

URL: http://llvm.org/viewvc/llvm-project?rev=333062&view=rev
Log:
[X86] In the floating point max reduction intrinsics, negate infinity before 
feeding it to set1.

Previously we negated the whole vector after splatting infinity. But its better 
to negate the infinity before splatting. This generates IR with the negate 
already folded with the infinity constant.

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=333062&r1=333061&r2=333062&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Tue May 22 22:51:52 2018
@@ -9956,7 +9956,7 @@ _mm512_mask_reduce_max_epu64(__mmask8 __
 
 static __inline__ double __DEFAULT_FN_ATTRS
 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
-  _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()),
+  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(-__builtin_inf()),
   max_pd, d, f, pd, __M);
 }
 
@@ -10099,7 +10099,7 @@ _mm512_mask_reduce_max_epu32(__mmask16 _
 
 static __inline__ float __DEFAULT_FN_ATTRS
 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
-  _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), 
max_ps, , f,
+  _mm512_mask_reduce_maxMin_32bit(__V,_mm512_set1_ps(-__builtin_inff()), 
max_ps, , f,
   ps, __M);
 }
 

Modified: cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c?rev=333062&r1=333061&r2=333062&view=diff
==
--- cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c (original)
+++ cfe/trunk/test/CodeGen/avx512-reduceMinMaxIntrin.c Tue May 22 22:51:52 2018
@@ -564,7 +564,7 @@ unsigned long test_mm512_mask_reduce_max
 // CHECK:   store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64
 // CHECK:   [[TMP2:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1
 // CHECK:   [[TMP3:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], 
align 64
-// CHECK:   store double 0x7FF0, double* [[__W_ADDR_I_I]], align 8
+// CHECK:   store double 0xFFF0, double* [[__W_ADDR_I_I]], align 8
 // CHECK:   [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
 // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double 
[[TMP4]], i32 0
 // CHECK:   [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
@@ -583,9 +583,8 @@ unsigned long test_mm512_mask_reduce_max
 // CHECK:   [[VECINIT7_I_I:%.*]] = insertelement <8 x double> 
[[VECINIT6_I_I]], double [[TMP11]], i32 7
 // CHECK:   store <8 x double> [[VECINIT7_I_I]], <8 x double>* 
[[_COMPOUNDLITERAL_I_I]], align 64
 // CHECK:   [[TMP12:%.*]] = load <8 x double>, <8 x double>* 
[[_COMPOUNDLITERAL_I_I]], align 64
-// CHECK:   [[SUB_I:%.*]] = fsub <8 x double> , [[TMP12]]
 // CHECK:   [[TMP13:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
-// CHECK:   [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x double> [[TMP3]], 
<8 x double> [[SUB_I]]
+// CHECK:   [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x double> [[TMP3]], 
<8 x double> [[TMP12]]
 // CHECK:   store <8 x double> [[TMP14]], <8 x double>* [[__V_ADDR_I]], align 
64
 // CHECK:   [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], 
align 64
 // CHECK:   [[TMP16:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], 
align 64
@@ -1859,7 +1858,7 @@ unsigned int test_mm512_mask_reduce_max_
 // CHECK:   store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64
 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2
 // CHECK:   [[TMP3:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], 
align 64
-// CHECK:   store float 0x7FF0, float* [[__W_ADDR_I_I]], align 4
+// CHECK:   store float 0xFFF0, float* [[__W_ADDR_I_I]], align 4
 // CHECK:   [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
 // CHECK:   [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float 
[[TMP4]], i32 0
 // CHECK:   [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
@@ -1894,9 +1893,8 @@ unsigned int test_mm512_mask_reduce_max_
 // CHECK:   [[VECINIT15_I_I:%.*]] = insertelement <16 x float> 
[[VECINIT14_I_I]], float [[TMP19]], i32 15
 // CHECK:   store <16 x float> [[VECINIT15_I_I]], <16 x float>* 
[[_COMPOUNDLITERAL_I_I]], align 64
 // CHECK:   [[TMP20:%.*]] = load <16 x float>, <16 x float>* 
[[_COMPOUNDLITERAL_I_I]], align 64
-// CHECK:   [[SUB_I:%.*]] = fsub <16 x float> , [[TMP20]]
 // CHECK:   [[TMP21:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-// CHECK:   [[TMP22:%.*]] = select <16 x i1> [[TMP21]], <16 x fl

r333064 - [X86] Undef the vector reduction helper macros when we're done with them.

2018-05-22 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue May 22 23:31:36 2018
New Revision: 333064

URL: http://llvm.org/viewvc/llvm-project?rev=333064&view=rev
Log:
[X86] Undef the vector reduction helper macros when we're done with them.

These are implementation helper macros we shouldn't expose them to user code if 
we don't need to.

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=333064&r1=333063&r2=333064&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Tue May 22 23:31:36 2018
@@ -9713,6 +9713,8 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M,
   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M,
 f, d, pd);
 }
+#undef _mm512_reduce_operator_64bit
+#undef _mm512_mask_reduce_operator_64bit
 
 // Vec512 - Vector with size 512.
 // Operator - Can be one of following: +,*,&,|
@@ -9840,6 +9842,8 @@ static __inline__ float __DEFAULT_FN_ATT
 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
 }
+#undef _mm512_reduce_operator_32bit
+#undef _mm512_mask_reduce_operator_32bit
 
 // Used bisection method. At each step, we partition the vector with previous
 // step in half, and the operation is performed on its two halves.
@@ -9977,6 +9981,8 @@ _mm512_mask_reduce_min_pd(__mmask8 __M,
   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
   min_pd, d, f, pd, __M);
 }
+#undef _mm512_reduce_maxMin_64bit
+#undef _mm512_mask_reduce_maxMin_64bit
 
 // Vec512 - Vector with size 512.
 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for 
example:
@@ -10120,6 +10126,8 @@ _mm512_mask_reduce_min_ps(__mmask16 __M,
   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), 
min_ps, , f,
   ps, __M);
 }
+#undef _mm512_reduce_maxMin_32bit
+#undef _mm512_mask_reduce_maxMin_32bit
 
 #undef __DEFAULT_FN_ATTRS
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333110 - [X86] Move all Intel defined intrinsic includes into immintrin.h

2018-05-23 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed May 23 11:32:58 2018
New Revision: 333110

URL: http://llvm.org/viewvc/llvm-project?rev=333110&view=rev
Log:
[X86] Move all Intel defined intrinsic includes into immintrin.h

This matches the Intel documentation which shows them available by importing 
immintrin.h. x86intrin.h also includes immintrin.h so anyone including 
x86intrin.h will still get them.

This is different than gcc, but I don't think we were a perfect match there 
already. I'm unclear what gcc's policy is about how they choose which to add 
things to.

Differential Revision: https://reviews.llvm.org/D47182

Modified:
cfe/trunk/lib/Headers/cldemoteintrin.h
cfe/trunk/lib/Headers/clzerointrin.h
cfe/trunk/lib/Headers/immintrin.h
cfe/trunk/lib/Headers/movdirintrin.h
cfe/trunk/lib/Headers/pconfigintrin.h
cfe/trunk/lib/Headers/ptwriteintrin.h
cfe/trunk/lib/Headers/rdseedintrin.h
cfe/trunk/lib/Headers/sgxintrin.h
cfe/trunk/lib/Headers/waitpkgintrin.h
cfe/trunk/lib/Headers/wbnoinvdintrin.h
cfe/trunk/lib/Headers/x86intrin.h

Modified: cfe/trunk/lib/Headers/cldemoteintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/cldemoteintrin.h?rev=333110&r1=333109&r2=333110&view=diff
==
--- cfe/trunk/lib/Headers/cldemoteintrin.h (original)
+++ cfe/trunk/lib/Headers/cldemoteintrin.h Wed May 23 11:32:58 2018
@@ -21,7 +21,7 @@
  *===---===
  */
 
-#ifndef __X86INTRIN_H
+#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
 #error "Never use  directly; include  instead."
 #endif
 

Modified: cfe/trunk/lib/Headers/clzerointrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/clzerointrin.h?rev=333110&r1=333109&r2=333110&view=diff
==
--- cfe/trunk/lib/Headers/clzerointrin.h (original)
+++ cfe/trunk/lib/Headers/clzerointrin.h Wed May 23 11:32:58 2018
@@ -20,7 +20,7 @@
  *
  *===---===
  */
-#ifndef __X86INTRIN_H
+#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
 #error "Never use  directly; include  instead."
 #endif
 

Modified: cfe/trunk/lib/Headers/immintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=333110&r1=333109&r2=333110&view=diff
==
--- cfe/trunk/lib/Headers/immintrin.h (original)
+++ cfe/trunk/lib/Headers/immintrin.h Wed May 23 11:32:58 2018
@@ -90,6 +90,10 @@
 #include 
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
+#include 
+#endif
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
 #include 
 #endif
@@ -339,4 +343,41 @@ _writegsbase_u64(unsigned long long __V)
  * whereas others are also available at all times. */
 #include 
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
+#include 
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__)
+#include 
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)
+#include 
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)
+#include 
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)
+#include 
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+  defined(__MOVDIRI__) || defined(__MOVDIR64B__)
+#include 
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)
+#include 
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)
+#include 
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)
+#include 
+#endif
+
 #endif /* __IMMINTRIN_H */

Modified: cfe/trunk/lib/Headers/movdirintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/movdirintrin.h?rev=333110&r1=333109&r2=333110&view=diff
==
--- cfe/trunk/lib/Headers/movdirintrin.h (original)
+++ cfe/trunk/lib/Headers/movdirintrin.h Wed May 23 11:32:58 2018
@@ -20,7 +20,7 @@
  *
  *===---===
  */
-#ifndef __X86INTRIN_H
+#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
 #error "Never use  directly; include  instead."
 #endif
 

Modified: cfe/trunk/lib/Headers/pconfigintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/pconfigintrin.h?rev=333110&r1=333109&r2=333110&view=diff
==
--- cfe/trunk/lib/Headers/pconfigintrin.h (original)
+++ cfe/trunk/lib/Headers/pconfigintrin.h Wed May 23 11:32:58 2018
@@ -21,7 +21,7 @@
  *===---===
  */
 
-#ifndef

r333124 - [X86] Move the include of clzerointrin.h from immintrin.h back to x86intrin.h.

2018-05-23 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed May 23 14:04:26 2018
New Revision: 333124

URL: http://llvm.org/viewvc/llvm-project?rev=333124&view=rev
Log:
[X86] Move the include of clzerointrin.h from immintrin.h back to x86intrin.h.

This is an AMD intrinsic not an Intel intrinsic so it shouldn't be in 
immintrin.h

Modified:
cfe/trunk/lib/Headers/immintrin.h
cfe/trunk/lib/Headers/x86intrin.h

Modified: cfe/trunk/lib/Headers/immintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=333124&r1=333123&r2=333124&view=diff
==
--- cfe/trunk/lib/Headers/immintrin.h (original)
+++ cfe/trunk/lib/Headers/immintrin.h Wed May 23 14:04:26 2018
@@ -347,10 +347,6 @@ _writegsbase_u64(unsigned long long __V)
 #include 
 #endif
 
-#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__)
-#include 
-#endif
-
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)
 #include 
 #endif

Modified: cfe/trunk/lib/Headers/x86intrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/x86intrin.h?rev=333124&r1=333123&r2=333124&view=diff
==
--- cfe/trunk/lib/Headers/x86intrin.h (original)
+++ cfe/trunk/lib/Headers/x86intrin.h Wed May 23 14:04:26 2018
@@ -60,4 +60,9 @@
 #include 
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__)
+#include 
+#endif
+
+
 #endif /* __X86INTRIN_H */


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333211 - [X86] Fix a bad cast in _mm512_mask_abs_epi32 and _mm512_maskz_abs_epi32.

2018-05-24 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Thu May 24 10:32:49 2018
New Revision: 333211

URL: http://llvm.org/viewvc/llvm-project?rev=333211&view=rev
Log:
[X86] Fix a bad cast in _mm512_mask_abs_epi32 and _mm512_maskz_abs_epi32.

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=333211&r1=333210&r2=333211&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Thu May 24 10:32:49 2018
@@ -1948,7 +1948,7 @@ _mm512_abs_epi32(__m512i __A)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
 {
-  return (__m512i)__builtin_ia32_selectd_512((__mmask8)__U,
+  return (__m512i)__builtin_ia32_selectd_512(__U,
  (__v16si)_mm512_abs_epi32(__A),
  (__v16si)__W);
 }
@@ -1956,7 +1956,7 @@ _mm512_mask_abs_epi32 (__m512i __W, __mm
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
 {
-  return (__m512i)__builtin_ia32_selectd_512((__mmask8)__U,
+  return (__m512i)__builtin_ia32_selectd_512(__U,
  (__v16si)_mm512_abs_epi32(__A),
  (__v16si)_mm512_setzero_si512());
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333318 - [X86] Mark a few more builtins const that were missed in r331814.

2018-05-25 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri May 25 15:07:43 2018
New Revision: 18

URL: http://llvm.org/viewvc/llvm-project?rev=18&view=rev
Log:
[X86] Mark a few more builtins const that were missed in r331814.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=18&r1=17&r2=18&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri May 25 15:07:43 2018
@@ -1237,9 +1237,9 @@ TARGET_BUILTIN(__builtin_ia32_pmaxsq256,
 TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4LLiV4LLiV4LLi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2LLiV2LLiV2LLi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4LLiV4LLiV4LLi", "n", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "nc", 
"avx512vl")


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333320 - [X86] Correct the target features on two avx512bw builtins that were incorrectly labeled as avx512f.

2018-05-25 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri May 25 15:43:20 2018
New Revision: 20

URL: http://llvm.org/viewvc/llvm-project?rev=20&view=rev
Log:
[X86] Correct the target features on two avx512bw builtins that were 
incorrectly labeled as avx512f.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=20&r1=19&r2=20&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri May 25 15:43:20 2018
@@ -1486,8 +1486,8 @@ TARGET_BUILTIN(__builtin_ia32_vcomisd, "
 TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kunpckdi, "ULLiULLiULLi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32s*V32sUi", "n", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64c*V64cULLi", "n", 
"avx512f")
+TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32s*V32sUi", "n", 
"avx512bw")
+TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64c*V64cULLi", "n", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8LLiIiUcIi", 
"nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8LLiIiUcIi", 
"nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_fixupimmps512_mask, "V16fV16fV16fV16iIiUsIi", 
"nc", "avx512f")


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333321 - [X86] Add const to another builtin that was missed from r331814.

2018-05-25 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri May 25 15:52:29 2018
New Revision: 21

URL: http://llvm.org/viewvc/llvm-project?rev=21&view=rev
Log:
[X86] Add const to another builtin that was missed from r331814.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=21&r1=20&r2=21&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri May 25 15:52:29 2018
@@ -1689,7 +1689,7 @@ TARGET_BUILTIN(__builtin_ia32_pmovusdw12
 TARGET_BUILTIN(__builtin_ia32_pmovusdw128mem_mask, "vV8s*V4iUc", "n", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusdw256_mask, "V8sV8iV8sUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusdw256mem_mask, "vV8s*V8iUc", "n", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2LLiV16cUc", "n", 
"avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2LLiV16cUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusqb128mem_mask, "vV16c*V2LLiUc", "n", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusqb256_mask, "V16cV4LLiV16cUc", "nc", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusqb256mem_mask, "vV16c*V4LLiUc", "n", 
"avx512vl")


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333348 - [X86] Remove mask from avx512ifma builtins. Use a select instruction instead.

2018-05-26 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sat May 26 11:55:26 2018
New Revision: 48

URL: http://llvm.org/viewvc/llvm-project?rev=48&view=rev
Log:
[X86] Remove mask from avx512ifma builtins. Use a select instruction instead.

This reduces from 12 builtins to 6 since we no longer need a mask and maskz 
version.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512ifmaintrin.h
cfe/trunk/lib/Headers/avx512ifmavlintrin.h
cfe/trunk/test/CodeGen/avx512ifma-builtins.c
cfe/trunk/test/CodeGen/avx512ifmavl-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=48&r1=47&r2=48&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sat May 26 11:55:26 2018
@@ -1461,18 +1461,12 @@ TARGET_BUILTIN(__builtin_ia32_movdqa64lo
 TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc", 
"n", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc", "n", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc", "n", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", 
"nc", "avx512ifma")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", 
"nc", "avx512ifma")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", 
"nc", "avx512ifma")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", 
"nc", "avx512ifma")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", 
"nc", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", 
"nc", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", 
"nc", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", 
"nc", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", 
"nc", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", 
"nc", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", 
"nc", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", 
"nc", "avx512ifma,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8LLiV8LLiV8LLiV8LLi", "nc", 
"avx512ifma")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8LLiV8LLiV8LLiV8LLi", "nc", 
"avx512ifma")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2LLiV2LLiV2LLiV2LLi", "nc", 
"avx512ifma,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256, "V4LLiV4LLiV4LLiV4LLi", "nc", 
"avx512ifma,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2LLiV2LLiV2LLiV2LLi", "nc", 
"avx512ifma,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4LLiV4LLiV4LLiV4LLi", "nc", 
"avx512ifma,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varqi512_mask, "V64cV64cV64cV64cULLi", 
"nc", "avx512vbmi")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varqi512_mask, "V64cV64cV64cV64cULLi", 
"nc", "avx512vbmi")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varqi512_maskz, "V64cV64cV64cV64cULLi", 
"nc", "avx512vbmi")

Modified: cfe/trunk/lib/Headers/avx512ifmaintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512ifmaintrin.h?rev=48&r1=47&r2=48&view=diff
==
--- cfe/trunk/lib/Headers/avx512ifmaintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512ifmaintrin.h Sat May 26 11:55:26 2018
@@ -34,57 +34,47 @@
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
 {
-  return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X,
-   (__v8di) __Y,
-   (__v8di) __Z,
-   (__mmask8) -1);
+  return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y,
+(__v8di) __Z);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
-  __m512i __Y)
+_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i 
__Y)
 {
-  return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __W,
-   (__v8di) __X,
-   (__v8di) __Y,
-   (__mmask8) __M);
+  return (__m512i)__builtin_ia32_selectq_512(__M,
+   (__v8di)_mm512_madd52hi_epu64(__W, __X, 
__Y),
+   (__v8di)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i 
__Z)
 {
-  r

r333387 - [X86] Merge the 3 different flavors of masked vpermi2var/vpermt2var builtins to a single version without masking. Use select builtins with appropriate operand instead.

2018-05-28 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Mon May 28 20:26:38 2018
New Revision: 87

URL: http://llvm.org/viewvc/llvm-project?rev=87&view=rev
Log:
[X86] Merge the 3 different flavors of masked vpermi2var/vpermt2var builtins to 
a single version without masking. Use select builtins with appropriate operand 
instead.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vbmiintrin.h
cfe/trunk/lib/Headers/avx512vbmivlintrin.h
cfe/trunk/lib/Headers/avx512vlbwintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vbmi-builtins.c
cfe/trunk/test/CodeGen/avx512vbmivl-builtin.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c
cfe/trunk/test/CodeGen/avx512vlbw-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=87&r1=86&r2=87&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon May 28 20:26:38 2018
@@ -969,10 +969,6 @@ TARGET_BUILTIN(__builtin_ia32_storeupd51
 TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vV8d*V8dUc", "n", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "n", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermt2vard512_mask, "V16iV16iV16iV16iUs", "nc", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", 
"nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", 
"nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "nc", 
"avx512f")
 
 TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_mask, "V4iV4iV4iV4iUc", "nc", 
"avx512vl,avx512vnni")
 TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_mask, "V8iV8iV8iV8iUc", "nc", 
"avx512vl,avx512vnni")
@@ -1092,10 +1088,6 @@ TARGET_BUILTIN(__builtin_ia32_psubsw512_
 TARGET_BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", "nc", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", "nc", 
"avx512bw")
 
-TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512_mask, "V32sV32sV32sV32sUi", 
"nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_vpermt2varhi512_mask, "V32sV32sV32sV32sUi", 
"nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_vpermt2varhi512_maskz, "V32sV32sV32sV32sUi", 
"nc", "avx512bw")
-
 TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128_mask, "V2LLiV2LLiV2LLiUc", 
"nc", "avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256_mask, "V4LLiV4LLiV4LLiUc", 
"nc", "avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpconflictsi_128_mask, "V4iV4iV4iUc", "nc", 
"avx512cd,avx512vl")
@@ -1123,13 +1115,6 @@ TARGET_BUILTIN(__builtin_ia32_vpshufbitq
 TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "nc", 
"avx512vl,avx512bitalg")
 TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "nc", 
"avx512bitalg")
 
-TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "nc", 
"avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", 
"nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "nc", 
"avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_maskz, "V8sV8sV8sV8sUc", "nc", 
"avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_vpermt2varhi256_mask, "V16sV16sV16sV16sUs", 
"nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_vpermt2varhi256_maskz, "V16sV16sV16sV16sUs", 
"nc", "avx512vl,avx512bw")
-
 TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "nc", "avx512bw")
@@ -1266,30 +1251,24 @@ TARGET_BUILTIN(__builtin_ia32_scattersiv
 TARGET_BUILTIN(__builtin_ia32_scattersiv8sf, "vf*UcV8iV8fIi", "n", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_scattersiv8si, "vi*UcV8iV8iIi", "n", "avx512vl")
 
-TARGET_BUILTIN(__builtin_ia32_vpermi2vard128_mask, "V4iV4iV4iV4iUc", "nc", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2vard256_mask, "V8iV8iV8iV8iUc", "nc", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varpd128_mask, "V2dV2dV2LLiV2dUc", "nc", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varpd256_mask, "V4dV4dV4LLiV4dUc", "nc", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varps128_mask, "V4fV4fV4iV4fUc", "nc", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varps256_mask, "V8fV8fV8iV8fUc", "nc", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", 
"nc", "avx512vl")
-TARGET

r333446 - [X86] Tag some 128/256 load/store instructions as requiring avx512vl instead of avx512f.

2018-05-29 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue May 29 11:23:22 2018
New Revision: 333446

URL: http://llvm.org/viewvc/llvm-project?rev=333446&view=rev
Log:
[X86] Tag some 128/256 load/store instructions as requiring avx512vl instead of 
avx512f.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=333446&r1=333445&r2=333446&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue May 29 11:23:22 2018
@@ -1428,18 +1428,18 @@ TARGET_BUILTIN(__builtin_ia32_psraw512,
 TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc", "n", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc", "n", 
"avx512f")
+TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc", "n", 
"avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc", "n", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs", "n", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_movdqa32store512_mask, "vV16i*V16iUs", "n", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_movdqa64load512_mask, "V8LLiV8LLiC*V8LLiUc", 
"n", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_movdqa64store512_mask, "vV8LLi*V8LLiUc", "n", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_movdqa32store128_mask, "vV4i*V4iUc", "n", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_movdqa32store256_mask, "vV8i*V8iUc", "n", 
"avx512f")
+TARGET_BUILTIN(__builtin_ia32_movdqa32store128_mask, "vV4i*V4iUc", "n", 
"avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa32store256_mask, "vV8i*V8iUc", "n", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2LLiV2LLiC*V2LLiUc", 
"n", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc", 
"n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc", "n", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc", "n", 
"avx512f")
+TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc", "n", 
"avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc", "n", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8LLiV8LLiV8LLiV8LLi", "nc", 
"avx512ifma")
 TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8LLiV8LLiV8LLiV8LLi", "nc", 
"avx512ifma")
 TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2LLiV2LLiV2LLiV2LLi", "nc", 
"avx512ifma,avx512vl")
@@ -1484,8 +1484,8 @@ TARGET_BUILTIN(__builtin_ia32_loadss128_
 TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc", "n", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc", "n", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc", "n", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddqusi128_mask, "V4iV4i*V4iUc", "n", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_loaddqusi256_mask, "V8iV8i*V8iUc", "n", 
"avx512f")
+TARGET_BUILTIN(__builtin_ia32_loaddqusi128_mask, "V4iV4i*V4iUc", "n", 
"avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loaddqusi256_mask, "V8iV8i*V8iUc", "n", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadupd128_mask, "V2dV2d*V2dUc", "n", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4d*V4dUc", "n", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4f*V4fUc", "n", "avx512vl")


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333497 - [X86] Fix the names of a bunch of icelake intrinsics.

2018-05-29 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue May 29 20:38:15 2018
New Revision: 333497

URL: http://llvm.org/viewvc/llvm-project?rev=333497&view=rev
Log:
[X86] Fix the names of a bunch of icelake intrinsics.

Mostly this fixes the names of all the 128-bit intrinsics to start with _mm_ 
instead of _mm128_ as is the convention and what the Intel docs say.

This also fixes the name of the bitshuffle intrinsics to say epi64 for 128 and 
256 bit versions.

Modified:
cfe/trunk/lib/Headers/avx512vlbitalgintrin.h
cfe/trunk/lib/Headers/avx512vlvbmi2intrin.h
cfe/trunk/lib/Headers/avx512vlvnniintrin.h
cfe/trunk/test/CodeGen/avx512vlbitalg-builtins.c
cfe/trunk/test/CodeGen/avx512vlvbmi2-builtins.c
cfe/trunk/test/CodeGen/avx512vlvnni-builtins.c

Modified: cfe/trunk/lib/Headers/avx512vlbitalgintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlbitalgintrin.h?rev=333497&r1=333496&r2=333497&view=diff
==
--- cfe/trunk/lib/Headers/avx512vlbitalgintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlbitalgintrin.h Tue May 29 20:38:15 2018
@@ -54,23 +54,23 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U,
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm128_popcnt_epi16(__m128i __A)
+_mm_popcnt_epi16(__m128i __A)
 {
   return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
+_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
 {
   return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
-  (__v8hi) _mm128_popcnt_epi16(__B),
+  (__v8hi) _mm_popcnt_epi16(__B),
   (__v8hi) __A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
+_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
 {
-  return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
+  return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
   __U,
   __B);
 }
@@ -98,29 +98,29 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U,
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm128_popcnt_epi8(__m128i __A)
+_mm_popcnt_epi8(__m128i __A)
 {
   return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
+_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
 {
   return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
-  (__v16qi) _mm128_popcnt_epi8(__B),
+  (__v16qi) _mm_popcnt_epi8(__B),
   (__v16qi) __A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
+_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
 {
-  return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
+  return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
   __U,
   __B);
 }
 
 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B)
+_mm256_mask_bitshuffle_epi64_mask(__mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A,
   (__v32qi) __B,
@@ -128,15 +128,15 @@ _mm256_mask_bitshuffle_epi32_mask(__mmas
 }
 
 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B)
+_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B)
 {
-  return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1,
+  return _mm256_mask_bitshuffle_epi64_mask((__mmask32) -1,
   __A,
   __B);
 }
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B)
+_mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A,
   (__v16qi) __B,
@@ -144,9 +144,9 @@ _mm128_mask_bitshuffle_epi16_mask(__mmas
 }
 
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B)
+_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B)
 {
-  return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1,
+  return _mm_mask_bitshuffle_epi64_mask((__mmask16) -1,
   __A,
   __B);
 }

Modified: cfe/trunk/lib/Headers/avx512vlvbmi2intrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlvbmi2intrin.h?rev=333497&r1=333496&r2=333497&view=diff
==
--- cfe/trunk/lib/Headers/avx512vlvbmi2intrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlvbmi2intrin.h Tue May 29 20:38:15 2018
@@ -31,13 +31,8 @@
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, 
__target_

r333509 - [X86] Remove masking from the AVX512VNNI builtins. Use a select in IR instead.

2018-05-29 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue May 29 22:26:04 2018
New Revision: 333509

URL: http://llvm.org/viewvc/llvm-project?rev=333509&view=rev
Log:
[X86] Remove masking from the AVX512VNNI builtins. Use a select in IR instead.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512vlvnniintrin.h
cfe/trunk/lib/Headers/avx512vnniintrin.h
cfe/trunk/test/CodeGen/avx512vlvnni-builtins.c
cfe/trunk/test/CodeGen/avx512vnni-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=333509&r1=333508&r2=333509&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue May 29 22:26:04 2018
@@ -970,30 +970,18 @@ TARGET_BUILTIN(__builtin_ia32_storeapd51
 TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "n", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "n", "avx512f")
 
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_mask, "V4iV4iV4iV4iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_mask, "V8iV8iV8iV8iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_mask, "V16iV16iV16iV16iUs", "nc", 
"avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_mask, "V4iV4iV4iV4iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_mask, "V8iV8iV8iV8iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_mask, "V16iV16iV16iV16iUs", "nc", 
"avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_mask, "V4iV4iV4iV4iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_mask, "V8iV8iV8iV8iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_mask, "V16iV16iV16iV16iUs", "nc", 
"avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_mask, "V4iV4iV4iV4iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_mask, "V8iV8iV8iV8iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_mask, "V16iV16iV16iV16iUs", "nc", 
"avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_maskz, "V4iV4iV4iV4iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_maskz, "V8iV8iV8iV8iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_maskz, "V16iV16iV16iV16iUs", "nc", 
"avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_maskz, "V4iV4iV4iV4iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_maskz, "V8iV8iV8iV8iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_maskz, "V16iV16iV16iV16iUs", "nc", 
"avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_maskz, "V4iV4iV4iV4iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_maskz, "V8iV8iV8iV8iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_maskz, "V16iV16iV16iV16iUs", "nc", 
"avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_maskz, "V4iV4iV4iV4iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_maskz, "V8iV8iV8iV8iUc", "nc", 
"avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_maskz, "V16iV16iV16iV16iUs", "nc", 
"avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "nc", 
"avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "nc", 
"avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "nc", 
"avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "nc", 
"avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "nc", 
"avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "nc", 
"avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "nc", 
"avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "nc", 
"avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "nc", 
"avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "nc", 
"avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "nc", 
"avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "nc", 
"avx512vnni")
 
 TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi", "n", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi", "n", 
"avx512vl")

Modified: cfe/trunk/lib/Headers/avx512vlvnniintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlvnniintrin.h?rev=333509&r1=333508&r2=333509&view=diff
==
--- cfe/trunk/lib/Headers/avx512vlvnniintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512

r315470 - [X86] Correct type for argument to clflushopt intrinsic.

2017-10-11 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Oct 11 09:06:08 2017
New Revision: 315470

URL: http://llvm.org/viewvc/llvm-project?rev=315470&view=rev
Log:
[X86] Correct type for argument to clflushopt intrinsic.

Summary: According to Intel docs this should take void const *. We had char*. 
The lack of const is the main issue.

Reviewers: RKSimon, zvi, igorb

Reviewed By: igorb

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D38782

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/clflushoptintrin.h

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=315470&r1=315469&r2=315470&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Oct 11 09:06:08 2017
@@ -639,7 +639,7 @@ TARGET_BUILTIN(__builtin_ia32_xsavec, "v
 TARGET_BUILTIN(__builtin_ia32_xsaves, "vv*ULLi", "", "xsaves")
 
 //CLFLUSHOPT
-TARGET_BUILTIN(__builtin_ia32_clflushopt, "vc*", "", "clflushopt")
+TARGET_BUILTIN(__builtin_ia32_clflushopt, "vvC*", "", "clflushopt")
 
 // ADX
 TARGET_BUILTIN(__builtin_ia32_addcarryx_u32, "UcUcUiUiUi*", "", "adx")

Modified: cfe/trunk/lib/Headers/clflushoptintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/clflushoptintrin.h?rev=315470&r1=315469&r2=315470&view=diff
==
--- cfe/trunk/lib/Headers/clflushoptintrin.h (original)
+++ cfe/trunk/lib/Headers/clflushoptintrin.h Wed Oct 11 09:06:08 2017
@@ -32,7 +32,7 @@
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__,  
__target__("clflushopt")))
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_clflushopt(char * __m) {
+_mm_clflushopt(void const * __m) {
   __builtin_ia32_clflushopt(__m);
 }
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r315517 - [X86] Add support for 'amdfam17h' to __builtin_cpu_is to match gcc.

2017-10-11 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Oct 11 14:42:02 2017
New Revision: 315517

URL: http://llvm.org/viewvc/llvm-project?rev=315517&view=rev
Log:
[X86] Add support for 'amdfam17h' to __builtin_cpu_is to match gcc.

The compiler-rt implementation already supported it, it just wasn't exposed.

Modified:
cfe/trunk/lib/Basic/Targets/X86.cpp
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/target-builtin-noerror.c

Modified: cfe/trunk/lib/Basic/Targets/X86.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=315517&r1=315516&r2=315517&view=diff
==
--- cfe/trunk/lib/Basic/Targets/X86.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/X86.cpp Wed Oct 11 14:42:02 2017
@@ -1293,6 +1293,7 @@ bool X86TargetInfo::validateCpuIs(String
   .Case("amd", true)
   .Case("amdfam10h", true)
   .Case("amdfam15h", true)
+  .Case("amdfam17h", true)
   .Case("atom", true)
   .Case("barcelona", true)
   .Case("bdver1", true)

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=315517&r1=315516&r2=315517&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Oct 11 14:42:02 2017
@@ -7501,6 +7501,7 @@ Value *CodeGenFunction::EmitX86CpuIs(Str
 INTEL_KNL,
 AMD_BTVER1,
 AMD_BTVER2,
+AMDFAM17H,
 CPU_SUBTYPE_START,
 INTEL_COREI7_NEHALEM,
 INTEL_COREI7_WESTMERE,
@@ -7527,6 +7528,7 @@ Value *CodeGenFunction::EmitX86CpuIs(Str
   .Case("amdfam10", AMDFAM10H)
   .Case("amdfam15h", AMDFAM15H)
   .Case("amdfam15", AMDFAM15H)
+  .Case("amdfam17h", AMDFAM17H)
   .Case("atom", INTEL_BONNELL)
   .Case("barcelona", AMDFAM10H_BARCELONA)
   .Case("bdver1", AMDFAM15H_BDVER1)

Modified: cfe/trunk/test/CodeGen/target-builtin-noerror.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/target-builtin-noerror.c?rev=315517&r1=315516&r2=315517&view=diff
==
--- cfe/trunk/test/CodeGen/target-builtin-noerror.c (original)
+++ cfe/trunk/test/CodeGen/target-builtin-noerror.c Wed Oct 11 14:42:02 2017
@@ -81,6 +81,7 @@ void verifycpustrings() {
   (void)__builtin_cpu_is("amd");
   (void)__builtin_cpu_is("amdfam10h");
   (void)__builtin_cpu_is("amdfam15h");
+  (void)__builtin_cpu_is("amdfam17h");
   (void)__builtin_cpu_is("atom");
   (void)__builtin_cpu_is("barcelona");
   (void)__builtin_cpu_is("bdver1");


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r315547 - [X86] Remove a few unnecessary check lines from the predefined-arch-macros test.

2017-10-11 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Oct 11 19:06:17 2017
New Revision: 315547

URL: http://llvm.org/viewvc/llvm-project?rev=315547&view=rev
Log:
[X86] Remove a few unnecessary check lines from the predefined-arch-macros test.

These were testing OS macros and clang/llvm macros.

Modified:
cfe/trunk/test/Preprocessor/predefined-arch-macros.c

Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=315547&r1=315546&r2=315547&view=diff
==
--- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original)
+++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Wed Oct 11 19:06:17 
2017
@@ -1011,20 +1011,12 @@
 // CHECK_GLM_M32: #define __XSAVEOPT__ 1
 // CHECK_GLM_M32: #define __XSAVES__ 1
 // CHECK_GLM_M32: #define __XSAVE__ 1
-// CHECK_GLM_M32: #define __clang__ 1
 // CHECK_GLM_M32: #define __goldmont 1
 // CHECK_GLM_M32: #define __goldmont__ 1
 // CHECK_GLM_M32: #define __i386 1
 // CHECK_GLM_M32: #define __i386__ 1
-// CHECK_GLM_M32: #define __linux 1
-// CHECK_GLM_M32: #define __linux__ 1
-// CHECK_GLM_M32: #define __llvm__ 1
 // CHECK_GLM_M32: #define __tune_goldmont__ 1
-// CHECK_GLM_M32: #define __unix 1
-// CHECK_GLM_M32: #define __unix__ 1
 // CHECK_GLM_M32: #define i386 1
-// CHECK_GLM_M32: #define linux 1
-// CHECK_GLM_M32: #define unix 1
 //
 // RUN: %clang -march=goldmont -m64 -E -dM %s -o - 2>&1 \
 // RUN: -target i386-unknown-linux \
@@ -1049,19 +1041,11 @@
 // CHECK_GLM_M64: #define __XSAVEOPT__ 1
 // CHECK_GLM_M64: #define __XSAVES__ 1
 // CHECK_GLM_M64: #define __XSAVE__ 1
-// CHECK_GLM_M64: #define __gnu_linux__ 1
 // CHECK_GLM_M64: #define __goldmont 1
 // CHECK_GLM_M64: #define __goldmont__ 1
-// CHECK_GLM_M64: #define __linux 1
-// CHECK_GLM_M64: #define __linux__ 1
-// CHECK_GLM_M64: #define __llvm__ 1
 // CHECK_GLM_M64: #define __tune_goldmont__ 1
-// CHECK_GLM_M64: #define __unix 1
-// CHECK_GLM_M64: #define __unix__ 1
 // CHECK_GLM_M64: #define __x86_64 1
 // CHECK_GLM_M64: #define __x86_64__ 1
-// CHECK_GLM_M64: #define linux 1
-// CHECK_GLM_M64: #define unix 1
 //
 // RUN: %clang -march=slm -m32 -E -dM %s -o - 2>&1 \
 // RUN: -target i386-unknown-linux \


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r315594 - [X86] Use -ffreestanding instead of using the mm_malloc.h include guard hack on more of the builtin tests.

2017-10-12 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Thu Oct 12 10:21:01 2017
New Revision: 315594

URL: http://llvm.org/viewvc/llvm-project?rev=315594&view=rev
Log:
[X86] Use -ffreestanding instead of using the mm_malloc.h include guard hack on 
more of the builtin tests.

Modified:
cfe/trunk/test/CodeGen/adc-builtins.c
cfe/trunk/test/CodeGen/avx512ifmavl-builtins.c
cfe/trunk/test/CodeGen/builtin-clflushopt.c
cfe/trunk/test/CodeGen/builtin-clzero.c

Modified: cfe/trunk/test/CodeGen/adc-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/adc-builtins.c?rev=315594&r1=315593&r2=315594&view=diff
==
--- cfe/trunk/test/CodeGen/adc-builtins.c (original)
+++ cfe/trunk/test/CodeGen/adc-builtins.c Thu Oct 12 10:21:01 2017
@@ -1,6 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | 
FileCheck %s
-
-#define __MM_MALLOC_H
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-unknown -emit-llvm -o 
- %s | FileCheck %s
 
 #include 
 

Modified: cfe/trunk/test/CodeGen/avx512ifmavl-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512ifmavl-builtins.c?rev=315594&r1=315593&r2=315594&view=diff
==
--- cfe/trunk/test/CodeGen/avx512ifmavl-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512ifmavl-builtins.c Thu Oct 12 10:21:01 2017
@@ -1,6 +1,4 @@
-// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512ifma 
-target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
-
-#define __MM_MALLOC_H
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-apple-darwin 
-target-feature +avx512ifma -target-feature +avx512vl -emit-llvm -o - -Wall 
-Werror | FileCheck %s
 
 #include 
 

Modified: cfe/trunk/test/CodeGen/builtin-clflushopt.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtin-clflushopt.c?rev=315594&r1=315593&r2=315594&view=diff
==
--- cfe/trunk/test/CodeGen/builtin-clflushopt.c (original)
+++ cfe/trunk/test/CodeGen/builtin-clflushopt.c Thu Oct 12 10:21:01 2017
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +clflushopt  
-emit-llvm -o - -Wall -Werror | FileCheck %s
-#define __MM_MALLOC_H
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-apple-darwin 
-target-feature +clflushopt  -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include 
 
-#include 
 void test_mm_clflushopt(char * __m) {
   //CHECK-LABEL: @test_mm_clflushopt
   //CHECK: @llvm.x86.clflushopt

Modified: cfe/trunk/test/CodeGen/builtin-clzero.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtin-clzero.c?rev=315594&r1=315593&r2=315594&view=diff
==
--- cfe/trunk/test/CodeGen/builtin-clzero.c (original)
+++ cfe/trunk/test/CodeGen/builtin-clzero.c Thu Oct 12 10:21:01 2017
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +clzero  
-emit-llvm -o - -Wall -Werror | FileCheck %s
-#define __MM_MALLOC_H
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-apple-darwin 
-target-feature +clzero  -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include 
+
 void test_mm_clzero(void * __m) {
   //CHECK-LABEL: @test_mm_clzero
   //CHECK: @llvm.x86.clzero


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r315607 - [X86] Add CLWB intrinsic. clang part

2017-10-12 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Thu Oct 12 11:57:15 2017
New Revision: 315607

URL: http://llvm.org/viewvc/llvm-project?rev=315607&view=rev
Log:
[X86] Add CLWB intrinsic. clang part

Reviewers: RKSimon, zvi, igorb

Reviewed By: RKSimon

Subscribers: cfe-commits

Differential Revision: https://reviews.llvm.org/D38781

Added:
cfe/trunk/lib/Headers/clwbintrin.h
cfe/trunk/test/CodeGen/builtin-clwb.c
Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/CMakeLists.txt
cfe/trunk/lib/Headers/immintrin.h

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=315607&r1=315606&r2=315607&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu Oct 12 11:57:15 2017
@@ -641,6 +641,9 @@ TARGET_BUILTIN(__builtin_ia32_xsaves, "v
 //CLFLUSHOPT
 TARGET_BUILTIN(__builtin_ia32_clflushopt, "vvC*", "", "clflushopt")
 
+//CLWB
+TARGET_BUILTIN(__builtin_ia32_clwb, "vvC*", "", "clwb")
+
 // ADX
 TARGET_BUILTIN(__builtin_ia32_addcarryx_u32, "UcUcUiUiUi*", "", "adx")
 TARGET_BUILTIN(__builtin_ia32_addcarry_u32, "UcUcUiUiUi*", "", "")

Modified: cfe/trunk/lib/Headers/CMakeLists.txt
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/CMakeLists.txt?rev=315607&r1=315606&r2=315607&view=diff
==
--- cfe/trunk/lib/Headers/CMakeLists.txt (original)
+++ cfe/trunk/lib/Headers/CMakeLists.txt Thu Oct 12 11:57:15 2017
@@ -33,6 +33,7 @@ set(files
   clzerointrin.h
   cpuid.h
   clflushoptintrin.h
+  clwbintrin.h
   emmintrin.h
   f16cintrin.h
   float.h

Added: cfe/trunk/lib/Headers/clwbintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/clwbintrin.h?rev=315607&view=auto
==
--- cfe/trunk/lib/Headers/clwbintrin.h (added)
+++ cfe/trunk/lib/Headers/clwbintrin.h Thu Oct 12 11:57:15 2017
@@ -0,0 +1,52 @@
+/*=== clwbintrin.h - CLWB intrinsic ===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===---===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use  directly; include  instead."
+#endif
+
+#ifndef __CLWBINTRIN_H
+#define __CLWBINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__,  
__target__("clwb")))
+
+/// \brief Writes back to memory the cache line (if modified) that contains the
+/// linear address specified in \a __p from any level of the cache hierarchy in
+/// the cache coherence domain
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the  CLWB  instruction.
+///
+/// \param __p
+///A pointer to the memory location used to identify the cache line to be
+///written back.
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_clwb(void const *__p) {
+  __builtin_ia32_clwb(__p);
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif

Modified: cfe/trunk/lib/Headers/immintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=315607&r1=315606&r2=315607&view=diff
==
--- cfe/trunk/lib/Headers/immintrin.h (original)
+++ cfe/trunk/lib/Headers/immintrin.h Thu Oct 12 11:57:15 2017
@@ -58,6 +58,10 @@
 #include 
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
+#include 
+#endif
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
 #include 
 #endif

Added: cfe/trunk/test/CodeGen/builtin-clwb.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtin-clwb.c?rev=315607&view=auto
===

r315723 - [X86] Add skeleton support for knm cpu

2017-10-13 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri Oct 13 11:14:24 2017
New Revision: 315723

URL: http://llvm.org/viewvc/llvm-project?rev=315723&view=rev
Log:
[X86] Add skeleton support for knm cpu

This adds support Knights Mill CPU. Preprocessor defines match gcc's 
implementation.

Differential Revision: https://reviews.llvm.org/D38813

Modified:
cfe/trunk/lib/Basic/Targets/X86.cpp
cfe/trunk/lib/Basic/Targets/X86.h
cfe/trunk/test/Driver/x86-march.c
cfe/trunk/test/Preprocessor/predefined-arch-macros.c

Modified: cfe/trunk/lib/Basic/Targets/X86.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=315723&r1=315722&r2=315723&view=diff
==
--- cfe/trunk/lib/Basic/Targets/X86.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/X86.cpp Fri Oct 13 11:14:24 2017
@@ -229,6 +229,8 @@ bool X86TargetInfo::initFeatureMap(
 setFeatureEnabledImpl(Features, "cx16", true);
 break;
 
+  case CK_KNM:
+// TODO: Add avx5124fmaps/avx5124vnniw.
   case CK_KNL:
 setFeatureEnabledImpl(Features, "avx512f", true);
 setFeatureEnabledImpl(Features, "avx512cd", true);
@@ -853,6 +855,8 @@ void X86TargetInfo::getTargetDefines(con
   case CK_KNL:
 defineCPUMacros(Builder, "knl");
 break;
+  case CK_KNM:
+break;
   case CK_Lakemont:
 Builder.defineMacro("__tune_lakemont__");
 break;
@@ -1553,6 +1557,7 @@ X86TargetInfo::CPUKind X86TargetInfo::ge
   .Cases("skylake-avx512", "skx", CK_SkylakeServer)
   .Case("cannonlake", CK_Cannonlake)
   .Case("knl", CK_KNL)
+  .Case("knm", CK_KNM)
   .Case("lakemont", CK_Lakemont)
   .Case("k6", CK_K6)
   .Case("k6-2", CK_K6_2)

Modified: cfe/trunk/lib/Basic/Targets/X86.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.h?rev=315723&r1=315722&r2=315723&view=diff
==
--- cfe/trunk/lib/Basic/Targets/X86.h (original)
+++ cfe/trunk/lib/Basic/Targets/X86.h Fri Oct 13 11:14:24 2017
@@ -203,6 +203,10 @@ class LLVM_LIBRARY_VISIBILITY X86TargetI
 /// Knights Landing processor.
 CK_KNL,
 
+/// \name Knights Mill
+/// Knights Mill processor.
+CK_KNM,
+
 /// \name Lakemont
 /// Lakemont microarchitecture based processors.
 CK_Lakemont,
@@ -321,6 +325,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetI
 case CK_SkylakeServer:
 case CK_Cannonlake:
 case CK_KNL:
+case CK_KNM:
 case CK_K8:
 case CK_K8SSE3:
 case CK_AMDFAM10:

Modified: cfe/trunk/test/Driver/x86-march.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/x86-march.c?rev=315723&r1=315722&r2=315723&view=diff
==
--- cfe/trunk/test/Driver/x86-march.c (original)
+++ cfe/trunk/test/Driver/x86-march.c Fri Oct 13 11:14:24 2017
@@ -52,6 +52,10 @@
 // RUN:   | FileCheck %s -check-prefix=knl
 // knl: "-target-cpu" "knl"
 //
+// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=knm 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=knm
+// knm: "-target-cpu" "knm"
+//
 // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=cannonlake 
2>&1 \
 // RUN:   | FileCheck %s -check-prefix=cannonlake
 // cannonlake: "-target-cpu" "cannonlake"

Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=315723&r1=315722&r2=315723&view=diff
==
--- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original)
+++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Fri Oct 13 11:14:24 
2017
@@ -783,6 +783,81 @@
 // CHECK_KNL_M64: #define __tune_knl__ 1
 // CHECK_KNL_M64: #define __x86_64 1
 // CHECK_KNL_M64: #define __x86_64__ 1
+
+// RUN: %clang -march=knm -m32 -E -dM %s -o - 2>&1 \
+// RUN: -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_KNM_M32
+// CHECK_KNM_M32: #define __AES__ 1
+// CHECK_KNM_M32: #define __AVX2__ 1
+// CHECK_KNM_M32: #define __AVX512CD__ 1
+// CHECK_KNM_M32: #define __AVX512ER__ 1
+// CHECK_KNM_M32: #define __AVX512F__ 1
+// CHECK_KNM_M32: #define __AVX512PF__ 1
+// CHECK_KNM_M32: #define __AVX__ 1
+// CHECK_KNM_M32: #define __BMI2__ 1
+// CHECK_KNM_M32: #define __BMI__ 1
+// CHECK_KNM_M32: #define __F16C__ 1
+// CHECK_KNM_M32: #define __FMA__ 1
+// CHECK_KNM_M32: #define __LZCNT__ 1
+// CHECK_KNM_M32: #define __MMX__ 1
+// CHECK_KNM_M32: #define __PCLMUL__ 1
+// CHECK_KNM_M32: #define __POPCNT__ 1
+// CHECK_KNM_M32: #define __PREFETCHWT1__ 1
+// CHECK_KNM_M32: #define __RDRND__ 1
+// CHECK_KNM_M32: #define __RTM__ 1
+// CHECK_KNM_M32: #define __SSE2__ 1
+// CHECK_KNM_M32: #define __SSE3__ 1
+// CHECK_KNM_M32: #define __SSE4_1__ 1
+// CHECK_KNM_M32: #define __SSE4_2__ 1
+// CHECK_KNM_M32: #define __SSE__ 1
+// CHECK_K

r315729 - [X86] Remove 'knm' defines from predefined-arch-macros.c test.

2017-10-13 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri Oct 13 11:38:10 2017
New Revision: 315729

URL: http://llvm.org/viewvc/llvm-project?rev=315729&view=rev
Log:
[X86] Remove 'knm' defines from predefined-arch-macros.c test.

Direction seems to be that we dont' want to keep adding these, but I forgot to 
remove it from the test before I committed r315723.

Modified:
cfe/trunk/test/Preprocessor/predefined-arch-macros.c

Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=315729&r1=315728&r2=315729&view=diff
==
--- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original)
+++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Fri Oct 13 11:38:10 
2017
@@ -815,9 +815,6 @@
 // CHECK_KNM_M32: #define __XSAVE__ 1
 // CHECK_KNM_M32: #define __i386 1
 // CHECK_KNM_M32: #define __i386__ 1
-// CHECK_KNM_M32: #define __knm 1
-// CHECK_KNM_M32: #define __knm__ 1
-// CHECK_KNM_M32: #define __tune_knm__ 1
 // CHECK_KNM_M32: #define i386 1
 
 // RUN: %clang -march=knm -m64 -E -dM %s -o - 2>&1 \
@@ -853,9 +850,6 @@
 // CHECK_KNM_M64: #define __XSAVE__ 1
 // CHECK_KNM_M64: #define __amd64 1
 // CHECK_KNM_M64: #define __amd64__ 1
-// CHECK_KNM_M64: #define __knm 1
-// CHECK_KNM_M64: #define __knm__ 1
-// CHECK_KNM_M64: #define __tune_knm__ 1
 // CHECK_KNM_M64: #define __x86_64 1
 // CHECK_KNM_M64: #define __x86_64__ 1
 //


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r340713 - [X86] Don't set min_vector_width to 512 on intrinsics that only operate on k registers.

2018-08-26 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sun Aug 26 22:27:15 2018
New Revision: 340713

URL: http://llvm.org/viewvc/llvm-project?rev=340713&view=rev
Log:
[X86] Don't set min_vector_width to 512 on intrinsics that only operate on k 
registers.

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=340713&r1=340712&r2=340713&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Sun Aug 26 22:27:15 2018
@@ -175,6 +175,7 @@ typedef enum
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, 
__target__("avx512f"), __min_vector_width__(512)))
 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, 
__target__("avx512f"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, 
__target__("avx512f")))
 
 /* Create vectors with repeated elements */
 
@@ -508,13 +509,13 @@ _mm512_castsi512_si256 (__m512i __A)
   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_int2mask(int __a)
 {
   return (__mmask16)__a;
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS512
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm512_mask2int(__mmask16 __a)
 {
   return (int)__a;
@@ -4580,7 +4581,7 @@ _mm512_store_epi64 (void *__P, __m512i _
 
 /* Mask ops */
 
-static __inline __mmask16 __DEFAULT_FN_ATTRS512
+static __inline __mmask16 __DEFAULT_FN_ATTRS
 _mm512_knot(__mmask16 __M)
 {
   return __builtin_ia32_knothi(__M);
@@ -5622,7 +5623,7 @@ _mm_maskz_getexp_ss (__mmask8 __U, __m12
   (__v4sf)_mm_setzero_ps(), \
   (__mmask8)(U), (int)(R))
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_kmov (__mmask16 __A)
 {
   return  __A;
@@ -8320,49 +8321,49 @@ _mm512_mask_permutexvar_epi32 (__m512i _
 
 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_kand (__mmask16 __A, __mmask16 __B)
 {
   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_kandn (__mmask16 __A, __mmask16 __B)
 {
   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_kor (__mmask16 __A, __mmask16 __B)
 {
   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS512
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
 {
   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS512
+static __inline__ int __DEFAULT_FN_ATTRS
 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
 {
   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
 {
   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) 
__B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
 {
   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
 _mm512_kxor (__mmask16 __A, __mmask16 __B)
 {
   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r340714 - [X86] Undef __DEFAULT_FN_ATTRS in avx512fintrin.h.

2018-08-26 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sun Aug 26 22:44:45 2018
New Revision: 340714

URL: http://llvm.org/viewvc/llvm-project?rev=340714&view=rev
Log:
[X86] Undef __DEFAULT_FN_ATTRS in avx512fintrin.h.

Fixes test failure after r340713

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=340714&r1=340713&r2=340714&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Sun Aug 26 22:44:45 2018
@@ -9595,5 +9595,6 @@ _mm512_mask_reduce_min_ps(__mmask16 __M,
 
 #undef __DEFAULT_FN_ATTRS512
 #undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS
 
 #endif /* __AVX512FINTRIN_H */


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r340717 - [X86] Rename __DEFAULT_FN_ATTRS to a__DEFAULT_FN_ATTRS512 in avx512dqintrin.h and avx512bwintrin.h.

2018-08-26 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sun Aug 26 23:20:19 2018
New Revision: 340717

URL: http://llvm.org/viewvc/llvm-project?rev=340717&view=rev
Log:
[X86] Rename __DEFAULT_FN_ATTRS to a__DEFAULT_FN_ATTRS512 in avx512dqintrin.h 
and avx512bwintrin.h.

This is preparation for adding removing min_vector_width 512 from some 
intrinsics.

Modified:
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512dqintrin.h

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=340717&r1=340716&r2=340717&view=diff
==
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Sun Aug 26 23:20:19 2018
@@ -32,7 +32,7 @@ typedef unsigned int __mmask32;
 typedef unsigned long long __mmask64;
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, 
__target__("avx512bw"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, 
__target__("avx512bw"), __min_vector_width__(512)))
 
 /* Integer compare */
 
@@ -176,102 +176,102 @@ typedef unsigned long long __mmask64;
 #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_add_epi8 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v64qu) __A + (__v64qu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
  (__v64qi)_mm512_add_epi8(__A, 
__B),
  (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
  (__v64qi)_mm512_add_epi8(__A, 
__B),
  (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_sub_epi8 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v64qu) __A - (__v64qu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
  (__v64qi)_mm512_sub_epi8(__A, 
__B),
  (__v64qi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
  (__v64qi)_mm512_sub_epi8(__A, 
__B),
  (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_add_epi16 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hu) __A + (__v32hu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
  (__v32hi)_mm512_add_epi16(__A, 
__B),
  (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
  (__v32hi)_mm512_add_epi16(__A, 
__B),
  (__v32hi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_sub_epi16 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v32hu) __A - (__v32hu) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
  (__v32hi)_mm512_sub_epi16(__A, 
__B),
  (__v32hi)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i 

r340719 - [X86] Add intrinsics for kand/kandn/knot/kor/kxnor/kxor with 8, 32, and 64-bit mask registers.

2018-08-26 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sun Aug 26 23:20:22 2018
New Revision: 340719

URL: http://llvm.org/viewvc/llvm-project?rev=340719&view=rev
Log:
[X86] Add intrinsics for kand/kandn/knot/kor/kxnor/kxor with 8, 32, and 64-bit 
mask registers.

This also adds a second intrinsic name for the 16-bit mask versions.

These intrinsics match gcc and icc. They just aren't published in the Intel 
Intrinsics Guide so I only recently found they existed.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/CodeGen/avx512dq-builtins.c
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=340719&r1=340718&r2=340719&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sun Aug 26 23:20:22 2018
@@ -1005,7 +1005,10 @@ TARGET_BUILTIN(__builtin_ia32_scatterpfd
 TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiLLi*IiIi", "nV:512:", 
"avx512pf")
 TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLii*IiIi", "nV:512:", 
"avx512pf")
 
+TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_knotdi, "ULLiULLi", "nc", "avx512bw")
 
 TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", 
"avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", 
"avx512vl")
@@ -1734,14 +1737,29 @@ TARGET_BUILTIN(__builtin_ia32_fpclassps5
 TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", 
"avx512dq")
 TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", 
"avx512dq")
 TARGET_BUILTIN(__builtin_ia32_fpcla_mask, "UcV4fIiUc", "ncV:128:", 
"avx512dq")
+TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kanddi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kandndi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kordi, "ULLiULLiULLi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kxnordi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kxordi, "ULLiULLiULLi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", 
"avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", 
"avx512bw,avx512vl")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=340719&r1=340718&r2=340719&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sun Aug 26 23:20:22 2018
@@ -8603,8 +8603,9 @@ static Value *EmitX86CompressStore(CodeG
 }
 
 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps 
Opc,
-  unsigned NumElts, ArrayRef Ops,
+  ArrayRef Ops,
   bool InvertLHS = false) {
+  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
   Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
   Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
 
@@ -10013,7 +10014,7 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 
   case X8

r340718 - [X86] Remove min_vector_width 512 from some intrinsics that operate only on k-registers.

2018-08-26 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sun Aug 26 23:20:20 2018
New Revision: 340718

URL: http://llvm.org/viewvc/llvm-project?rev=340718&view=rev
Log:
[X86] Remove min_vector_width 512 from some intrinsics that operate only on 
k-registers.

Modified:
cfe/trunk/lib/Headers/avx512bwintrin.h

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=340718&r1=340717&r2=340718&view=diff
==
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Sun Aug 26 23:20:20 2018
@@ -33,6 +33,7 @@ typedef unsigned long long __mmask64;
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, 
__target__("avx512bw"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, 
__target__("avx512bw")))
 
 /* Integer compare */
 
@@ -1792,5 +1793,6 @@ _mm512_sad_epu8 (__m512i __A, __m512i __
 }
 
 #undef __DEFAULT_FN_ATTRS512
+#undef __DEFAULT_FN_ATTRS
 
 #endif


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r340798 - [X86] Add kortest intrinsics for 8, 32, and 64 bit masks. Add new intrinsic names for 16 bit masks.

2018-08-27 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Mon Aug 27 23:28:25 2018
New Revision: 340798

URL: http://llvm.org/viewvc/llvm-project?rev=340798&view=rev
Log:
[X86] Add kortest intrinsics for 8, 32, and 64 bit masks. Add new intrinsic 
names for 16 bit masks.

This matches gcc and icc despite not being documented in the Intel Intrinsics 
Guide.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/CodeGen/avx512dq-builtins.c
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=340798&r1=340797&r2=340798&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon Aug 27 23:28:25 2018
@@ -1749,8 +1749,14 @@ TARGET_BUILTIN(__builtin_ia32_korqi, "Uc
 TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kordi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestcqi, "iUcUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_kortestzqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iULLiULLi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=340798&r1=340797&r2=340798&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Aug 27 23:28:25 2018
@@ -10012,14 +10012,21 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 return EmitX86MaskedCompare(*this, CC, false, Ops);
   }
 
+  case X86::BI__builtin_ia32_kortestcqi:
   case X86::BI__builtin_ia32_kortestchi:
-  case X86::BI__builtin_ia32_kortestzhi: {
+  case X86::BI__builtin_ia32_kortestcsi:
+  case X86::BI__builtin_ia32_kortestcdi: {
 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
-Value *C;
-if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
-  C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
-else
-  C = llvm::Constant::getNullValue(Builder.getInt16Ty());
+Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
+Value *Cmp = Builder.CreateICmpEQ(Or, C);
+return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
+  }
+  case X86::BI__builtin_ia32_kortestzqi:
+  case X86::BI__builtin_ia32_kortestzhi:
+  case X86::BI__builtin_ia32_kortestzsi:
+  case X86::BI__builtin_ia32_kortestzdi: {
+Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
+Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
 Value *Cmp = Builder.CreateICmpEQ(Or, C);
 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
   }

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=340798&r1=340797&r2=340798&view=diff
==
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Mon Aug 27 23:28:25 2018
@@ -107,6 +107,42 @@ _kxor_mask64(__mmask64 __A, __mmask64 __
   return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
 }
 
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
+{
+  return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
+{
+  return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
+  *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
+  return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
+}
+
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
+{
+  return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
+}
+
+st

r340879 - [X86] Add kadd intrinsics to match gcc and icc.

2018-08-28 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue Aug 28 15:32:14 2018
New Revision: 340879

URL: http://llvm.org/viewvc/llvm-project?rev=340879&view=rev
Log:
[X86] Add kadd intrinsics to match gcc and icc.

This adds the following intrinsics:
_kadd_mask64
_kadd_mask32
_kadd_mask16
_kadd_mask8

These are missing from the Intel Intrinsics Guide, but are implemented by both 
gcc and icc.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/CodeGen/avx512dq-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=340879&r1=340878&r2=340879&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue Aug 28 15:32:14 2018
@@ -1737,6 +1737,10 @@ TARGET_BUILTIN(__builtin_ia32_fpclassps5
 TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", 
"avx512dq")
 TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", 
"avx512dq")
 TARGET_BUILTIN(__builtin_ia32_fpcla_mask, "UcV4fIiUc", "ncV:128:", 
"avx512dq")
+TARGET_BUILTIN(__builtin_ia32_kaddqi, "UcUcUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_kaddhi, "UsUsUs", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_kaddsi, "UiUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kadddi, "ULLiULLiULLi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=340879&r1=340878&r2=340879&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Aug 28 15:32:14 2018
@@ -8613,7 +8613,7 @@ static Value *EmitX86MaskLogic(CodeGenFu
 LHS = CGF.Builder.CreateNot(LHS);
 
   return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
-  CGF.Builder.getIntNTy(std::max(NumElts, 
8U)));
+   Ops[0]->getType());
 }
 
 static Value *EmitX86Select(CodeGenFunction &CGF,
@@ -10031,6 +10031,34 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
   }
 
+  case X86::BI__builtin_ia32_kaddqi:
+  case X86::BI__builtin_ia32_kaddhi:
+  case X86::BI__builtin_ia32_kaddsi:
+  case X86::BI__builtin_ia32_kadddi: {
+Intrinsic::ID IID;
+switch (BuiltinID) {
+default: llvm_unreachable("Unsupported intrinsic!");
+case X86::BI__builtin_ia32_kaddqi:
+  IID = Intrinsic::x86_avx512_kadd_b;
+  break;
+case X86::BI__builtin_ia32_kaddhi:
+  IID = Intrinsic::x86_avx512_kadd_w;
+  break;
+case X86::BI__builtin_ia32_kaddsi:
+  IID = Intrinsic::x86_avx512_kadd_d;
+  break;
+case X86::BI__builtin_ia32_kadddi:
+  IID = Intrinsic::x86_avx512_kadd_q;
+  break;
+}
+
+unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
+Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
+Function *Intr = CGM.getIntrinsic(IID);
+Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
+return Builder.CreateBitCast(Res, Ops[0]->getType());
+  }
   case X86::BI__builtin_ia32_kandqi:
   case X86::BI__builtin_ia32_kandhi:
   case X86::BI__builtin_ia32_kandsi:

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=340879&r1=340878&r2=340879&view=diff
==
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Tue Aug 28 15:32:14 2018
@@ -143,6 +143,18 @@ _kortest_mask64_u8(__mmask64 __A, __mmas
   return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
 }
 
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_kadd_mask32(__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_kadd_mask64(__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
+}
+
 /* Integer compare */
 
 #define _mm512_cmp_epi8_mask(a, b, p) \

Modified: cfe/trunk/lib/Headers/avx512dqintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=340879&r1=340878&r2=340879&view=diff
==

r341234 - [X86] Add kshift intrinsics to match gcc and icc.

2018-08-31 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri Aug 31 11:22:52 2018
New Revision: 341234

URL: http://llvm.org/viewvc/llvm-project?rev=341234&view=rev
Log:
[X86] Add kshift intrinsics to match gcc and icc.

This adds the following intrinsics:
_kshiftli_mask8
_kshiftli_mask16
_kshiftli_mask32
_kshiftli_mask64
_kshiftri_mask8
_kshiftri_mask16
_kshiftri_mask32
_kshiftri_mask64

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Sema/SemaChecking.cpp
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/CodeGen/avx512dq-builtins.c
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=341234&r1=341233&r2=341234&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Aug 31 11:22:52 2018
@@ -1770,6 +1770,14 @@ TARGET_BUILTIN(__builtin_ia32_kxorqi, "U
 TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kxordi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kshiftliqi, "UcUcIUi", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_kshiftlihi, "UsUsIUi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kshiftlisi, "UiUiIUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "ULLiULLiIUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kshiftriqi, "UcUcIUi", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_kshiftrihi, "UsUsIUi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kshiftrisi, "UiUiIUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kshiftridi, "ULLiULLiIUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", 
"avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", 
"avx512bw,avx512vl")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=341234&r1=341233&r2=341234&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Aug 31 11:22:52 2018
@@ -9929,6 +9929,50 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 "psrldq");
 return Builder.CreateBitCast(SV, ResultType, "cast");
   }
+  case X86::BI__builtin_ia32_kshiftliqi:
+  case X86::BI__builtin_ia32_kshiftlihi:
+  case X86::BI__builtin_ia32_kshiftlisi:
+  case X86::BI__builtin_ia32_kshiftlidi: {
+unsigned ShiftVal = cast(Ops[1])->getZExtValue() & 0xff;
+unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+
+if (ShiftVal >= NumElts)
+  return llvm::Constant::getNullValue(Ops[0]->getType());
+
+Value *In = getMaskVecValue(*this, Ops[0], NumElts);
+
+uint32_t Indices[64];
+for (unsigned i = 0; i != NumElts; ++i)
+  Indices[i] = NumElts + i - ShiftVal;
+
+Value *Zero = llvm::Constant::getNullValue(In->getType());
+Value *SV = Builder.CreateShuffleVector(Zero, In,
+makeArrayRef(Indices, NumElts),
+"kshiftl");
+return Builder.CreateBitCast(SV, Ops[0]->getType());
+  }
+  case X86::BI__builtin_ia32_kshiftriqi:
+  case X86::BI__builtin_ia32_kshiftrihi:
+  case X86::BI__builtin_ia32_kshiftrisi:
+  case X86::BI__builtin_ia32_kshiftridi: {
+unsigned ShiftVal = cast(Ops[1])->getZExtValue() & 0xff;
+unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+
+if (ShiftVal >= NumElts)
+  return llvm::Constant::getNullValue(Ops[0]->getType());
+
+Value *In = getMaskVecValue(*this, Ops[0], NumElts);
+
+uint32_t Indices[64];
+for (unsigned i = 0; i != NumElts; ++i)
+  Indices[i] = i + ShiftVal;
+
+Value *Zero = llvm::Constant::getNullValue(In->getType());
+Value *SV = Builder.CreateShuffleVector(In, Zero,
+makeArrayRef(Indices, NumElts),
+"kshiftr");
+return Builder.CreateBitCast(SV, Ops[0]->getType());
+  }
   case X86::BI__builtin_ia32_movnti:
   case X86::BI__builtin_ia32_movnti64:
   case X86::BI__builtin_ia32_movntsd:

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=341234&r1=341233&r2=341234&view=diff
==
--- cfe/trunk/lib/Headers

r341251 - [X86] Add k-mask conversion and load/store instrinsics to match gcc and icc.

2018-08-31 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri Aug 31 13:41:06 2018
New Revision: 341251

URL: http://llvm.org/viewvc/llvm-project?rev=341251&view=rev
Log:
[X86] Add k-mask conversion and load/store instrinsics to match gcc and icc.

This adds:
_cvtmask8_u32, _cvtmask16_u32, _cvtmask32_u32, _cvtmask64_u64
_cvtu32_mask8, _cvtu32_mask16, _cvtu32_mask32, _cvtu64_mask64
_load_mask8, _load_mask16, _load_mask32, _load_mask64
_store_mask8, _store_mask16, _store_mask32, _store_mask64

These are currently missing from the Intel Intrinsics Guide webpage.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/CodeGen/avx512dq-builtins.c
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=341251&r1=341250&r2=341251&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Aug 31 13:41:06 2018
@@ -1778,6 +1778,10 @@ TARGET_BUILTIN(__builtin_ia32_kshiftriqi
 TARGET_BUILTIN(__builtin_ia32_kshiftrihi, "UsUsIUi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kshiftrisi, "UiUiIUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kshiftridi, "ULLiULLiIUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kmovb, "UcUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_kmovw, "UsUs", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_kmovd, "UiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kmovq, "ULLiULLi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", 
"avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", 
"avx512bw,avx512vl")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=341251&r1=341250&r2=341251&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Aug 31 13:41:06 2018
@@ -10137,6 +10137,17 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 return Builder.CreateBitCast(Builder.CreateNot(Res),
  Ops[0]->getType());
   }
+  case X86::BI__builtin_ia32_kmovb:
+  case X86::BI__builtin_ia32_kmovw:
+  case X86::BI__builtin_ia32_kmovd:
+  case X86::BI__builtin_ia32_kmovq: {
+// Bitcast to vXi1 type and then back to integer. This gets the mask
+// register type into the IR, but might be optimized out depending on
+// what's around it.
+unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
+return Builder.CreateBitCast(Res, Ops[0]->getType());
+  }
 
   case X86::BI__builtin_ia32_kunpckdi:
   case X86::BI__builtin_ia32_kunpcksi:

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=341251&r1=341250&r2=341251&view=diff
==
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Fri Aug 31 13:41:06 2018
@@ -167,6 +167,46 @@ _kadd_mask64(__mmask64 __A, __mmask64 __
 #define _kshiftri_mask64(A, I) \
   (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I))
 
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_cvtmask32_u32(__mmask32 __A) {
+  return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
+}
+
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_cvtmask64_u64(__mmask64 __A) {
+  return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_cvtu32_mask32(unsigned int __A) {
+  return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_cvtu64_mask64(unsigned long long __A) {
+  return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+_load_mask32(__mmask32 *__A) {
+  return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
+}
+
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
+_load_mask64(__mmask64 *__A) {
+  return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_store_mask32(__mmask32 *__A, __mmask32 __B) {
+  *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_store_mask64(__mmask64 *__A, __mmask64 __B) {
+  *(__mmask64 *)__A = __builtin_ia32

r341265 - [X86] Add ktest intrinsics to match gcc and icc.

2018-08-31 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri Aug 31 15:29:56 2018
New Revision: 341265

URL: http://llvm.org/viewvc/llvm-project?rev=341265&view=rev
Log:
[X86] Add ktest intrinsics to match gcc and icc.

These aren't documented in the Intel Intrinsics Guide, but are supported by gcc 
and icc.

Includes these intrinsics:
_ktestc_mask8_u8, _ktestz_mask8_u8, _ktest_mask8_u8
_ktestc_mask16_u8, _ktestz_mask16_u8, _ktest_mask16_u8
_ktestc_mask32_u8, _ktestz_mask32_u8, _ktest_mask32_u8
_ktestc_mask64_u8, _ktestz_mask64_u8, _ktest_mask64_u8

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/CodeGen/avx512dq-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=341265&r1=341264&r2=341265&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Aug 31 15:29:56 2018
@@ -1761,6 +1761,14 @@ TARGET_BUILTIN(__builtin_ia32_kortestcsi
 TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iULLiULLi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestcqi, "iUcUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_ktestzqi, "iUcUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_ktestchi, "iUsUs", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_ktestzhi, "iUsUs", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_ktestcsi, "iUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestzsi, "iUiUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iULLiULLi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=341265&r1=341264&r2=341265&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Aug 31 15:29:56 2018
@@ -10075,6 +10075,50 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
   }
 
+  case X86::BI__builtin_ia32_ktestcqi:
+  case X86::BI__builtin_ia32_ktestzqi:
+  case X86::BI__builtin_ia32_ktestchi:
+  case X86::BI__builtin_ia32_ktestzhi:
+  case X86::BI__builtin_ia32_ktestcsi:
+  case X86::BI__builtin_ia32_ktestzsi:
+  case X86::BI__builtin_ia32_ktestcdi:
+  case X86::BI__builtin_ia32_ktestzdi: {
+Intrinsic::ID IID;
+switch (BuiltinID) {
+default: llvm_unreachable("Unsupported intrinsic!");
+case X86::BI__builtin_ia32_ktestcqi:
+  IID = Intrinsic::x86_avx512_ktestc_b;
+  break;
+case X86::BI__builtin_ia32_ktestzqi:
+  IID = Intrinsic::x86_avx512_ktestz_b;
+  break;
+case X86::BI__builtin_ia32_ktestchi:
+  IID = Intrinsic::x86_avx512_ktestc_w;
+  break;
+case X86::BI__builtin_ia32_ktestzhi:
+  IID = Intrinsic::x86_avx512_ktestz_w;
+  break;
+case X86::BI__builtin_ia32_ktestcsi:
+  IID = Intrinsic::x86_avx512_ktestc_d;
+  break;
+case X86::BI__builtin_ia32_ktestzsi:
+  IID = Intrinsic::x86_avx512_ktestz_d;
+  break;
+case X86::BI__builtin_ia32_ktestcdi:
+  IID = Intrinsic::x86_avx512_ktestc_q;
+  break;
+case X86::BI__builtin_ia32_ktestzdi:
+  IID = Intrinsic::x86_avx512_ktestz_q;
+  break;
+}
+
+unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
+Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
+Function *Intr = CGM.getIntrinsic(IID);
+return Builder.CreateCall(Intr, {LHS, RHS});
+  }
+
   case X86::BI__builtin_ia32_kaddqi:
   case X86::BI__builtin_ia32_kaddhi:
   case X86::BI__builtin_ia32_kaddsi:

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=341265&r1=341264&r2=341265&view=diff
==
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Fri Aug 31 15:29:56 2018
@@ -143,6 +143,42 @@ _kortest_mask64_u8(__mmask64 __A, __mmas
   return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
 }
 
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
+_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B)
+{
+  return 

r341678 - [X86] Modify addcarry/subborrow builtins to emit an 2 result and intrinsic and an store instruction.

2018-09-07 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri Sep  7 09:58:57 2018
New Revision: 341678

URL: http://llvm.org/viewvc/llvm-project?rev=341678&view=rev
Log:
[X86] Modify addcarry/subborrow builtins to emit an 2 result and intrinsic and 
an store instruction.

This is the clang side of D51769. The llvm intrinsics now return two results 
instead of using an out parameter.

Differential Revision: https://reviews.llvm.org/D51771

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/adc-builtins.c
cfe/trunk/test/CodeGen/adx-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=341678&r1=341677&r2=341678&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Sep  7 09:58:57 2018
@@ -10405,6 +10405,41 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   Ops[0]);
 return Builder.CreateExtractValue(Call, 1);
   }
+  case X86::BI__builtin_ia32_addcarryx_u32:
+  case X86::BI__builtin_ia32_addcarryx_u64:
+  case X86::BI__builtin_ia32_addcarry_u32:
+  case X86::BI__builtin_ia32_addcarry_u64:
+  case X86::BI__builtin_ia32_subborrow_u32:
+  case X86::BI__builtin_ia32_subborrow_u64: {
+Intrinsic::ID IID;
+switch (BuiltinID) {
+default: llvm_unreachable("Unsupported intrinsic!");
+case X86::BI__builtin_ia32_addcarryx_u32:
+  IID = Intrinsic::x86_addcarryx_u32;
+  break;
+case X86::BI__builtin_ia32_addcarryx_u64:
+  IID = Intrinsic::x86_addcarryx_u64;
+  break;
+case X86::BI__builtin_ia32_addcarry_u32:
+  IID = Intrinsic::x86_addcarry_u32;
+  break;
+case X86::BI__builtin_ia32_addcarry_u64:
+  IID = Intrinsic::x86_addcarry_u64;
+  break;
+case X86::BI__builtin_ia32_subborrow_u32:
+  IID = Intrinsic::x86_subborrow_u32;
+  break;
+case X86::BI__builtin_ia32_subborrow_u64:
+  IID = Intrinsic::x86_subborrow_u64;
+  break;
+}
+
+Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
+ { Ops[0], Ops[1], Ops[2] });
+Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
+  Ops[3]);
+return Builder.CreateExtractValue(Call, 0);
+  }
 
   case X86::BI__builtin_ia32_fpclassps128_mask:
   case X86::BI__builtin_ia32_fpclassps256_mask:

Modified: cfe/trunk/test/CodeGen/adc-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/adc-builtins.c?rev=341678&r1=341677&r2=341678&view=diff
==
--- cfe/trunk/test/CodeGen/adc-builtins.c (original)
+++ cfe/trunk/test/CodeGen/adc-builtins.c Fri Sep  7 09:58:57 2018
@@ -5,7 +5,10 @@
 unsigned char test_addcarry_u32(unsigned char __cf, unsigned int __x,
 unsigned int __y, unsigned int *__p) {
 // CHECK-LABEL: test_addcarry_u32
-// CHECK: call i8 @llvm.x86.addcarry.u32
+// CHECK: [[ADC:%.*]] = call { i8, i32 } @llvm.x86.addcarry.u32
+// CHECK: [[DATA:%.*]] = extractvalue { i8, i32 } [[ADC]], 1
+// CHECK: store i32 [[DATA]], i32* %{{.*}}
+// CHECK: [[CF:%.*]] = extractvalue { i8, i32 } [[ADC]], 0
   return _addcarry_u32(__cf, __x, __y, __p);
 }
 
@@ -13,14 +16,20 @@ unsigned char test_addcarry_u64(unsigned
 unsigned long long __y,
 unsigned long long *__p) {
 // CHECK-LABEL: test_addcarry_u64
-// CHECK: call i8 @llvm.x86.addcarry.u64
+// CHECK: [[ADC:%.*]] = call { i8, i64 } @llvm.x86.addcarry.u64
+// CHECK: [[DATA:%.*]] = extractvalue { i8, i64 } [[ADC]], 1
+// CHECK: store i64 [[DATA]], i64* %{{.*}}
+// CHECK: [[CF:%.*]] = extractvalue { i8, i64 } [[ADC]], 0
   return _addcarry_u64(__cf, __x, __y, __p);
 }
 
 unsigned char test_subborrow_u32(unsigned char __cf, unsigned int __x,
  unsigned int __y, unsigned int *__p) {
 // CHECK-LABEL: test_subborrow_u32
-// CHECK: call i8 @llvm.x86.subborrow.u32
+// CHECK: [[SBB:%.*]] = call { i8, i32 } @llvm.x86.subborrow.u32
+// CHECK: [[DATA:%.*]] = extractvalue { i8, i32 } [[SBB]], 1
+// CHECK: store i32 [[DATA]], i32* %{{.*}}
+// CHECK: [[CF:%.*]] = extractvalue { i8, i32 } [[SBB]], 0
   return _subborrow_u32(__cf, __x, __y, __p);
 }
 
@@ -28,6 +37,9 @@ unsigned char test_subborrow_u64(unsigne
  unsigned long long __y,
  unsigned long long *__p) {
 // CHECK-LABEL: test_subborrow_u64
-// CHECK: call i8 @llvm.x86.subborrow.u64
+// CHECK: [[SBB:%.*]] = call { i8, i64 } @llvm.x86.subborrow.u64
+// CHECK: [[DATA:%.*]] = extractvalue { i8, i64 } [[SBB]], 1
+// CHECK: store i64 [[DATA]], i64* %{{.*}}
+// CHECK: [[CF:%.*]] = extractvalue { i8, i64 } [[SBB]], 0
   return _subborrow_u64(__cf, __x, __y, __p);
 }

Modified: cfe/trunk/test/CodeGen/adx-built

r341699 - [X86] Custom emit __builtin_rdtscp so we can emit an explicit store for the out parameter

2018-09-07 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri Sep  7 12:14:24 2018
New Revision: 341699

URL: http://llvm.org/viewvc/llvm-project?rev=341699&view=rev
Log:
[X86] Custom emit __builtin_rdtscp so we can emit an explicit store for the out 
parameter

This is the clang side of D51803. The llvm intrinsic now returns two results. 
So we need to emit an explicit store in IR for the out parameter. This is 
similar to addcarry/subborrow/rdrand/rdseed.

Differential Revision: https://reviews.llvm.org/D51805

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/rd-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=341699&r1=341698&r2=341699&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Sep  7 12:14:24 2018
@@ -9158,6 +9158,12 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   case X86::BI__rdtsc: {
 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
   }
+  case X86::BI__builtin_ia32_rdtscp: {
+Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
+Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
+  Ops[0]);
+return Builder.CreateExtractValue(Call, 0);
+  }
   case X86::BI__builtin_ia32_undef128:
   case X86::BI__builtin_ia32_undef256:
   case X86::BI__builtin_ia32_undef512:

Modified: cfe/trunk/test/CodeGen/rd-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/rd-builtins.c?rev=341699&r1=341698&r2=341699&view=diff
==
--- cfe/trunk/test/CodeGen/rd-builtins.c (original)
+++ cfe/trunk/test/CodeGen/rd-builtins.c Fri Sep  7 12:14:24 2018
@@ -14,3 +14,12 @@ int test_rdtsc() {
 // CHECK: @test_rdtsc
 // CHECK: call i64 @llvm.x86.rdtsc
 }
+
+unsigned long long test_rdtscp(unsigned int *a) {
+// CHECK: @test_rdtscp
+// CHECK: [[RDTSCP:%.*]] = call { i64, i32 } @llvm.x86.rdtscp
+// CHECK: [[TSC_AUX:%.*]] = extractvalue { i64, i32 } [[RDTSCP]], 1
+// CHECK: store i32 [[TSC_AUX]], i32* %{{.*}}
+// CHECK: [[TSC:%.*]] = extractvalue { i64, i32 } [[RDTSCP]], 0
+  return __rdtscp(a);
+}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r324647 - [X86] Replace kortest intrinsics with native IR.

2018-02-08 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Thu Feb  8 12:16:17 2018
New Revision: 324647

URL: http://llvm.org/viewvc/llvm-project?rev=324647&view=rev
Log:
[X86] Replace kortest intrinsics with native IR.

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=324647&r1=324646&r2=324647&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Feb  8 12:16:17 2018
@@ -8710,6 +8710,18 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 return EmitX86MaskedCompare(*this, CC, false, Ops);
   }
 
+  case X86::BI__builtin_ia32_kortestchi:
+  case X86::BI__builtin_ia32_kortestzhi: {
+Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
+Value *C;
+if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
+  C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
+else
+  C = llvm::Constant::getNullValue(Builder.getInt16Ty());
+Value *Cmp = Builder.CreateICmpEQ(Or, C);
+return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
+  }
+
   case X86::BI__builtin_ia32_kandhi:
 return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
   case X86::BI__builtin_ia32_kandnhi:

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=324647&r1=324646&r2=324647&view=diff
==
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Thu Feb  8 12:16:17 2018
@@ -6247,16 +6247,28 @@ __mmask16 test_mm512_kor(__m512i __A, __
   __E, __F);
 }
 
-int test_mm512_kortestc(__mmask16 __A, __mmask16 __B) {
+int test_mm512_kortestc(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
   // CHECK-LABEL: @test_mm512_kortestc
-  // CHECK: @llvm.x86.avx512.kortestc.w
-  return _mm512_kortestc(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
+  // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
+  // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], -1
+  // CHECK: zext i1 [[CMP]] to i32
+  return _mm512_kortestc(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D));
 }
 
-int test_mm512_kortestz(__mmask16 __A, __mmask16 __B) {
+int test_mm512_kortestz(__m512i __A, __m512i __B, __m512i __C, __m512i __D) {
   // CHECK-LABEL: @test_mm512_kortestz
-  // CHECK: @llvm.x86.avx512.kortestz.w
-  return _mm512_kortestz(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[OR:%.*]] = or <16 x i1> [[LHS]], [[RHS]]
+  // CHECK: [[CAST:%.*]] = bitcast <16 x i1> [[OR]] to i16
+  // CHECK: [[CMP:%.*]] = icmp eq i16 [[CAST]], 0
+  // CHECK: zext i1 [[CMP]] to i32
+  return _mm512_kortestz(_mm512_cmpneq_epu32_mask(__A, __B),
+ _mm512_cmpneq_epu32_mask(__C, __D));
 }
 
 __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i 
__D, __m512i __E, __m512i __F) {


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r324828 - [X86] Change the signature of the AVX512 packed fp compare intrinsics to return vXi1 mask. Make bitcasts to scalar explicit in IR

2018-02-10 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sat Feb 10 15:34:27 2018
New Revision: 324828

URL: http://llvm.org/viewvc/llvm-project?rev=324828&view=rev
Log:
[X86] Change the signature of the AVX512 packed fp compare intrinsics to return 
vXi1 mask. Make bitcasts to scalar explicit in IR

Summary: This is the clang equivalent of r324827

Reviewers: zvi, delena, RKSimon, spatel

Reviewed By: RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D43143

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=324828&r1=324827&r2=324828&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sat Feb 10 15:34:27 2018
@@ -8060,6 +8060,29 @@ static Value *EmitX86Select(CodeGenFunct
   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
 }
 
+static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
+ unsigned NumElts, Value *MaskIn) {
+  if (MaskIn) {
+const auto *C = dyn_cast(MaskIn);
+if (!C || !C->isAllOnesValue())
+  Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
+  }
+
+  if (NumElts < 8) {
+uint32_t Indices[8];
+for (unsigned i = 0; i != NumElts; ++i)
+  Indices[i] = i;
+for (unsigned i = NumElts; i != 8; ++i)
+  Indices[i] = i % NumElts + NumElts;
+Cmp = CGF.Builder.CreateShuffleVector(
+Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
+  }
+
+  return CGF.Builder.CreateBitCast(Cmp,
+   IntegerType::get(CGF.getLLVMContext(),
+std::max(NumElts, 8U)));
+}
+
 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
bool Signed, ArrayRef Ops) {
   assert((Ops.size() == 2 || Ops.size() == 4) &&
@@ -8087,24 +8110,11 @@ static Value *EmitX86MaskedCompare(CodeG
 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
   }
 
-  if (Ops.size() == 4) {
-const auto *C = dyn_cast(Ops[3]);
-if (!C || !C->isAllOnesValue())
-  Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops[3], NumElts));
-  }
+  Value *MaskIn = nullptr;
+  if (Ops.size() == 4)
+MaskIn = Ops[3];
 
-  if (NumElts < 8) {
-uint32_t Indices[8];
-for (unsigned i = 0; i != NumElts; ++i)
-  Indices[i] = i;
-for (unsigned i = NumElts; i != 8; ++i)
-  Indices[i] = i % NumElts + NumElts;
-Cmp = CGF.Builder.CreateShuffleVector(
-Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
-  }
-  return CGF.Builder.CreateBitCast(Cmp,
-   IntegerType::get(CGF.getLLVMContext(),
-std::max(NumElts, 8U)));
+  return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
 }
 
 static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
@@ -8882,6 +8892,43 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 return Builder.CreateExtractValue(Call, 1);
   }
 
+  case X86::BI__builtin_ia32_cmpps128_mask:
+  case X86::BI__builtin_ia32_cmpps256_mask:
+  case X86::BI__builtin_ia32_cmpps512_mask:
+  case X86::BI__builtin_ia32_cmppd128_mask:
+  case X86::BI__builtin_ia32_cmppd256_mask:
+  case X86::BI__builtin_ia32_cmppd512_mask: {
+unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+Value *MaskIn = Ops[3];
+Ops.erase(&Ops[3]);
+
+Intrinsic::ID ID;
+switch (BuiltinID) {
+default: llvm_unreachable("Unsupported intrinsic!");
+case X86::BI__builtin_ia32_cmpps128_mask:
+  ID = Intrinsic::x86_avx512_mask_cmp_ps_128;
+  break;
+case X86::BI__builtin_ia32_cmpps256_mask:
+  ID = Intrinsic::x86_avx512_mask_cmp_ps_256;
+  break;
+case X86::BI__builtin_ia32_cmpps512_mask:
+  ID = Intrinsic::x86_avx512_mask_cmp_ps_512;
+  break;
+case X86::BI__builtin_ia32_cmppd128_mask:
+  ID = Intrinsic::x86_avx512_mask_cmp_pd_128;
+  break;
+case X86::BI__builtin_ia32_cmppd256_mask:
+  ID = Intrinsic::x86_avx512_mask_cmp_pd_256;
+  break;
+case X86::BI__builtin_ia32_cmppd512_mask:
+  ID = Intrinsic::x86_avx512_mask_cmp_pd_512;
+  break;
+}
+
+Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn);
+  }
+
   // SSE packed comparison intrinsics
   case X86::BI__builtin_ia32_cmpeqps:
   case X86::BI__builtin_ia32_cmpeqpd:

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=324828&r1=324827&r2=324828&view=diff
==

r324954 - [X86] Reverse the operand order of the implementation of the kunpack builtins.

2018-02-12 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Mon Feb 12 14:38:52 2018
New Revision: 324954

URL: http://llvm.org/viewvc/llvm-project?rev=324954&view=rev
Log:
[X86] Reverse the operand order of the implementation of the kunpack builtins.

The second operand needs to be in the lower bits of the concatenation. This 
matches llvm 5.0, gcc, and icc behavior.

Fixes PR36360.

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=324954&r1=324953&r2=324954&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Feb 12 14:38:52 2018
@@ -8846,7 +8846,8 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 RHS = Builder.CreateShuffleVector(RHS, RHS,
   makeArrayRef(Indices, NumElts / 2));
 // Concat the vectors.
-Value *Res = Builder.CreateShuffleVector(LHS, RHS,
+// NOTE: Operands are swapped to match the intrinsic definition.
+Value *Res = Builder.CreateShuffleVector(RHS, LHS,
  makeArrayRef(Indices, NumElts));
 return Builder.CreateBitCast(Res, Ops[0]->getType());
   }

Modified: cfe/trunk/test/CodeGen/avx512bw-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512bw-builtins.c?rev=324954&r1=324953&r2=324954&view=diff
==
--- cfe/trunk/test/CodeGen/avx512bw-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512bw-builtins.c Mon Feb 12 14:38:52 2018
@@ -1632,7 +1632,7 @@ __mmask64 test_mm512_kunpackd(__m512i __
   // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
   // CHECK: [[LHS2:%.*]] = shufflevector <64 x i1> [[LHS]], <64 x i1> [[LHS]], 
<32 x i32> 
   // CHECK: [[RHS2:%.*]] = shufflevector <64 x i1> [[RHS]], <64 x i1> [[RHS]], 
<32 x i32> 
-  // CHECK: [[CONCAT:%.*]] = shufflevector <32 x i1> [[LHS2]], <32 x i1> 
[[RHS2]], <64 x i32> 
+  // CHECK: [[CONCAT:%.*]] = shufflevector <32 x i1> [[RHS2]], <32 x i1> 
[[LHS2]], <64 x i32> 
   // CHECK: bitcast <64 x i1> [[CONCAT]] to i64
   return 
_mm512_mask_cmpneq_epu8_mask(_mm512_kunpackd(_mm512_cmpneq_epu8_mask(__B, 
__A),_mm512_cmpneq_epu8_mask(__C, __D)), __E, __F); 
 }
@@ -1643,7 +1643,7 @@ __mmask32 test_mm512_kunpackw(__m512i __
   // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
   // CHECK: [[LHS2:%.*]] = shufflevector <32 x i1> [[LHS]], <32 x i1> [[LHS]], 
<16 x i32> 
   // CHECK: [[RHS2:%.*]] = shufflevector <32 x i1> [[RHS]], <32 x i1> [[RHS]], 
<16 x i32> 
-  // CHECK: [[CONCAT:%.*]] = shufflevector <16 x i1> [[LHS2]], <16 x i1> 
[[RHS2]], <32 x i32> 
+  // CHECK: [[CONCAT:%.*]] = shufflevector <16 x i1> [[RHS2]], <16 x i1> 
[[LHS2]], <32 x i32> 
   return 
_mm512_mask_cmpneq_epu16_mask(_mm512_kunpackw(_mm512_cmpneq_epu16_mask(__B, 
__A),_mm512_cmpneq_epu16_mask(__C, __D)), __E, __F); 
 }
 

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=324954&r1=324953&r2=324954&view=diff
==
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Mon Feb 12 14:38:52 2018
@@ -6281,7 +6281,7 @@ __mmask16 test_mm512_kunpackb(__m512i __
   // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
   // CHECK: [[LHS2:%.*]] = shufflevector <16 x i1> [[LHS]], <16 x i1> [[LHS]], 
<8 x i32> 
   // CHECK: [[RHS2:%.*]] = shufflevector <16 x i1> [[RHS]], <16 x i1> [[RHS]], 
<8 x i32> 
-  // CHECK: [[CONCAT:%.*]] = shufflevector <8 x i1> [[LHS2]], <8 x i1> 
[[RHS2]], <16 x i32> 
+  // CHECK: [[CONCAT:%.*]] = shufflevector <8 x i1> [[RHS2]], <8 x i1> 
[[LHS2]], <16 x i32> 
   // CHECK: bitcast <16 x i1> [[CONCAT]] to i16
   return 
_mm512_mask_cmpneq_epu32_mask(_mm512_kunpackb(_mm512_cmpneq_epu32_mask(__A, 
__B),

_mm512_cmpneq_epu32_mask(__C, __D)),


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r325560 - [X86] Remove mask from 512 bit pmulhrsw/pmulhw/pmulhuw builtins.

2018-02-19 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Mon Feb 19 23:28:18 2018
New Revision: 325560

URL: http://llvm.org/viewvc/llvm-project?rev=325560&view=rev
Log:
[X86] Remove mask from 512 bit pmulhrsw/pmulhw/pmulhuw builtins.

We now use a vselect node in IR around an unmasked builtin. This makes it 
consistent with the 128 and 256 bit versions.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/test/CodeGen/avx512bw-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=325560&r1=325559&r2=325560&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon Feb 19 23:28:18 2018
@@ -1138,9 +1138,9 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2var
 TARGET_BUILTIN(__builtin_ia32_vpermt2varhi256_mask, "V16sV16sV16sV16sUs", "", 
"avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_vpermt2varhi256_maskz, "V16sV16sV16sV16sUs", "", 
"avx512vl,avx512bw")
 
-TARGET_BUILTIN(__builtin_ia32_pmulhrsw512_mask, "V32sV32sV32sV32sUi", "", 
"avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmulhuw512_mask, "V32sV32sV32sV32sUi", "", 
"avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmulhw512_mask, "V32sV32sV32sV32sUi", "", 
"avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "", "avx512bw")
 
 TARGET_BUILTIN(__builtin_ia32_addpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_addps512_mask, "V16fV16fV16fV16fUsIi", "", 
"avx512f")

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=325560&r1=325559&r2=325560&view=diff
==
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Mon Feb 19 23:28:18 2018
@@ -1008,87 +1008,70 @@ _mm512_maskz_permutex2var_epi16 (__mmask
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mulhrs_epi16 (__m512i __A, __m512i __B)
+_mm512_mulhrs_epi16(__m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
-(__v32hi) __B,
-(__v32hi) _mm512_setzero_hi(),
-(__mmask32) -1);
+  return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_mulhrs_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
-__m512i __B)
+_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
-(__v32hi) __B,
-(__v32hi) __W,
-(__mmask32) __U);
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+ (__v32hi)_mm512_mulhrs_epi16(__A, 
__B),
+ (__v32hi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_mulhrs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
-(__v32hi) __B,
-(__v32hi) _mm512_setzero_hi(),
-(__mmask32) __U);
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+ (__v32hi)_mm512_mulhrs_epi16(__A, 
__B),
+ (__v32hi)_mm512_setzero_hi());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mulhi_epi16 (__m512i __A, __m512i __B)
+_mm512_mulhi_epi16(__m512i __A, __m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
-  (__v32hi) __B,
-  (__v32hi) _mm512_setzero_hi(),
-  (__mmask32) -1);
+  return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_mulhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
__m512i __B)
 {
-  return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
-  (__v32hi) __B,
-  (__v32hi) __W,
-  (__mmask32) __U);
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+  (__v32hi)_mm512_mulhi_epi16(__A, 
__B),
+  (__v32hi)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_mulhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
 {
-  return (__m512i

r325655 - [X86] Disable CLWB in Cannon Lake

2018-02-20 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue Feb 20 16:16:50 2018
New Revision: 325655

URL: http://llvm.org/viewvc/llvm-project?rev=325655&view=rev
Log:
[X86] Disable CLWB in Cannon Lake

Cannon Lake does not support CLWB, therefore it
does not include all features listed under SKX.

Patch by Gabor Buella

Differential Revision: https://reviews.llvm.org/D43459

Modified:
cfe/trunk/lib/Basic/Targets/X86.cpp
cfe/trunk/test/Preprocessor/predefined-arch-macros.c

Modified: cfe/trunk/lib/Basic/Targets/X86.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=325655&r1=325654&r2=325655&view=diff
==
--- cfe/trunk/lib/Basic/Targets/X86.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/X86.cpp Tue Feb 20 16:16:50 2018
@@ -175,7 +175,8 @@ bool X86TargetInfo::initFeatureMap(
 setFeatureEnabledImpl(Features, "avx512bw", true);
 setFeatureEnabledImpl(Features, "avx512vl", true);
 setFeatureEnabledImpl(Features, "pku", true);
-setFeatureEnabledImpl(Features, "clwb", true);
+if (Kind != CK_Cannonlake) // CNL inherits all SKX features, except CLWB
+  setFeatureEnabledImpl(Features, "clwb", true);
 LLVM_FALLTHROUGH;
   case CK_SkylakeClient:
 setFeatureEnabledImpl(Features, "xsavec", true);

Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=325655&r1=325654&r2=325655&view=diff
==
--- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original)
+++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Tue Feb 20 16:16:50 
2018
@@ -974,7 +974,7 @@
 // CHECK_CNL_M32: #define __BMI2__ 1
 // CHECK_CNL_M32: #define __BMI__ 1
 // CHECK_CNL_M32: #define __CLFLUSHOPT__ 1
-// CHECK_CNL_M32: #define __CLWB__ 1
+// CHECK_CNL_M32-NOT: #define __CLWB__ 1
 // CHECK_CNL_M32: #define __F16C__ 1
 // CHECK_CNL_M32: #define __FMA__ 1
 // CHECK_CNL_M32: #define __LZCNT__ 1
@@ -1022,7 +1022,7 @@
 // CHECK_CNL_M64: #define __BMI2__ 1
 // CHECK_CNL_M64: #define __BMI__ 1
 // CHECK_CNL_M64: #define __CLFLUSHOPT__ 1
-// CHECK_CNL_M64: #define __CLWB__ 1
+// CHECK_CNL_M64-NOT: #define __CLWB__ 1
 // CHECK_CNL_M64: #define __F16C__ 1
 // CHECK_CNL_M64: #define __FMA__ 1
 // CHECK_CNL_M64: #define __LZCNT__ 1


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r326022 - [X86] Remove __builtin_ia32_permvarsf256_mask and __builtin_ia32_permvarsi256_mask and use the avx2 unmasked versions and a select instead.

2018-02-23 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri Feb 23 22:46:42 2018
New Revision: 326022

URL: http://llvm.org/viewvc/llvm-project?rev=326022&view=rev
Log:
[X86] Remove __builtin_ia32_permvarsf256_mask and 
__builtin_ia32_permvarsi256_mask and use the avx2 unmasked versions and a 
select instead.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/test/CodeGen/avx512vl-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=326022&r1=326021&r2=326022&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Feb 23 22:46:42 2018
@@ -1799,8 +1799,6 @@ TARGET_BUILTIN(__builtin_ia32_permvarhi1
 TARGET_BUILTIN(__builtin_ia32_permvarhi256_mask, 
"V16sV16sV16sV16sUs","","avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_permvardf256_mask, 
"V4dV4dV4LLiV4dUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_permvardi256_mask, 
"V4LLiV4LLiV4LLiV4LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarsf256_mask, 
"V8fV8fV8iV8fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarsi256_mask, 
"V8iV8iV8iV8iUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, 
"UcV2dIiUc","","avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, 
"UcV4dIiUc","","avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, 
"UcV4fIiUc","","avx512dq,avx512vl")

Modified: cfe/trunk/lib/Headers/avx512vlintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=326022&r1=326021&r2=326022&view=diff
==
--- cfe/trunk/lib/Headers/avx512vlintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlintrin.h Fri Feb 23 22:46:42 2018
@@ -8178,60 +8178,41 @@ _mm256_mask_permutexvar_epi64 (__m256i _
  __M);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
-  __m256 __Y)
-{
-  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
-(__v8si) __X,
-(__v8sf) __W,
-(__mmask8) __U);
-}
+#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
+_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
 {
-  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
-(__v8si) __X,
-(__v8sf) _mm256_setzero_ps (),
-(__mmask8) __U);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+(__v8sf)_mm256_permutexvar_ps(__X, 
__Y),
+(__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_permutexvar_ps (__m256i __X, __m256 __Y)
+_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
 {
-  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
-(__v8si) __X,
-(__v8sf) _mm256_undefined_si256 (),
-(__mmask8) -1);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+(__v8sf)_mm256_permutexvar_ps(__X, 
__Y),
+(__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
-{
-  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
- (__v8si) __X,
- (__v8si) _mm256_setzero_si256 (),
- __M);
-}
+#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
- __m256i __Y)
+_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
+  __m256i __Y)
 {
-  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
- (__v8si) __X,
- (__v8si) __W,
- (__mmask8) __M);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+ (__v8si)_mm256_permutexvar_epi32(__X, 
__Y),
+ (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
+_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
 {
-  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
- (__v8si) __X,
- (__v8si) _mm256_undefined_si256(),
- (__mmask8) -1);
+  return (__m256i)__builtin_ia32_select

r326039 - [X86] Remove some masked cvt builtins that can be replaced with legacy sse/avx buiiltins and a select.

2018-02-24 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sat Feb 24 10:55:13 2018
New Revision: 326039

URL: http://llvm.org/viewvc/llvm-project?rev=326039&view=rev
Log:
[X86] Remove some masked cvt builtins that can be replaced with legacy sse/avx 
buiiltins and a select.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/test/CodeGen/avx512vl-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=326039&r1=326038&r2=326039&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sat Feb 24 10:55:13 2018
@@ -1195,26 +1195,15 @@ TARGET_BUILTIN(__builtin_ia32_compressst
 TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256_mask, "V8fV8iV8fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256_mask, "V4iV4dV4iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps_mask, "V4fV2dV4fUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256_mask, "V4fV4dV4fUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2udq128_mask, "V4iV2dV4iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2udq256_mask, "V4iV4dV4iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtps2dq128_mask, "V4iV4fV4iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtps2dq256_mask, "V8iV8fV8iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pd128_mask, "V2dV4fV2dUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pd256_mask, "V4dV4fV4dUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtps2udq128_mask, "V4iV4fV4iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtps2udq256_mask, "V8iV8fV8iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2dq128_mask, "V4iV2dV4iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256_mask, "V4iV4dV4iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2udq128_mask, "V4iV2dV4iUc", "", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2udq256_mask, "V4iV4dV4iUc", "", 
"avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq128_mask, "V4iV4fV4iUc", "", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq256_mask, "V8iV8fV8iUc", "", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "", 
"avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtudq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl")

Modified: cfe/trunk/lib/Headers/avx512vlintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=326039&r1=326038&r2=326039&view=diff
==
--- cfe/trunk/lib/Headers/avx512vlintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlintrin.h Sat Feb 24 10:55:13 2018
@@ -1785,32 +1785,30 @@ _mm256_maskz_cvtepi32_pd (__mmask8 __U,
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
-  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
-   (__v4sf) __W,
-   (__mmask8) __U);
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+ (__v4sf)_mm_cvtepi32_ps(__A),
+ (__v4sf)__W);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) {
-  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
-   (__v4sf)
-   _mm_setzero_ps (),
-   (__mmask8) __U);
+  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
+ (__v4sf)_mm_cvtepi32_ps(__A),
+ (__v4sf)_mm_setzero_ps());
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
-  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
-   (__v8sf) __W,
-   (__mmask8) __U);
+  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+ (__v8sf)_mm256_cvtepi32_ps(__A),
+ (__v8sf)__W);
 }
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) {
-  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
-   (__v8sf)
-   _mm256_setzero_ps

r304326 - [TableGen] Clang changes to support Record::getValueAsString and getValueAsListOfStrings returning StringRef instead of std::string

2017-05-31 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed May 31 14:01:22 2017
New Revision: 304326

URL: http://llvm.org/viewvc/llvm-project?rev=304326&view=rev
Log:
[TableGen] Clang changes to support Record::getValueAsString and 
getValueAsListOfStrings returning StringRef instead of std::string

This is the clang version of D33710.

Differential Revision: https://reviews.llvm.org/D33711

Modified:
cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp
cfe/trunk/utils/TableGen/ClangDiagnosticsEmitter.cpp
cfe/trunk/utils/TableGen/ClangOptionDocEmitter.cpp
cfe/trunk/utils/TableGen/ClangSACheckersEmitter.cpp

Modified: cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp?rev=304326&r1=304325&r2=304326&view=diff
==
--- cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/ClangAttrEmitter.cpp Wed May 31 14:01:22 2017
@@ -718,9 +718,9 @@ namespace {
   };
 
   // Unique the enums, but maintain the original declaration ordering.
-  std::vector
-  uniqueEnumsInOrder(const std::vector &enums) {
-std::vector uniques;
+  std::vector
+  uniqueEnumsInOrder(const std::vector &enums) {
+std::vector uniques;
 SmallDenseSet unique_set;
 for (const auto &i : enums) {
   if (unique_set.insert(i).second)
@@ -731,7 +731,8 @@ namespace {
 
   class EnumArgument : public Argument {
 std::string type;
-std::vector values, enums, uniques;
+std::vector values, enums, uniques;
+
   public:
 EnumArgument(const Record &Arg, StringRef Attr)
   : Argument(Arg, Attr), type(Arg.getValueAsString("Type")),
@@ -850,7 +851,7 @@ namespace {
   
   class VariadicEnumArgument: public VariadicArgument {
 std::string type, QualifiedTypeName;
-std::vector values, enums, uniques;
+std::vector values, enums, uniques;
 
   protected:
 void writeValueImpl(raw_ostream &OS) const override {
@@ -1591,8 +1592,9 @@ struct AttributeSubjectMatchRule {
   }
 
   std::string getEnumValueName() const {
-std::string Result =
-"SubjectMatchRule_" + MetaSubject->getValueAsString("Name");
+SmallString<128> Result;
+Result += "SubjectMatchRule_";
+Result += MetaSubject->getValueAsString("Name");
 if (isSubRule()) {
   Result += "_";
   if (isNegatedSubRule())
@@ -1601,7 +1603,7 @@ struct AttributeSubjectMatchRule {
 }
 if (isAbstractRule())
   Result += "_abstract";
-return Result;
+return Result.str();
   }
 
   std::string getEnumValue() const { return "attr::" + getEnumValueName(); }
@@ -2603,7 +2605,7 @@ void EmitClangAttrPCHWrite(RecordKeeper
 // append a unique suffix to distinguish this set of target checks from other
 // TargetSpecificAttr records.
 static void GenerateTargetSpecificAttrChecks(const Record *R,
- std::vector &Arches,
+ std::vector &Arches,
  std::string &Test,
  std::string *FnName) {
   // It is assumed that there will be an llvm::Triple object
@@ -2613,8 +2615,9 @@ static void GenerateTargetSpecificAttrCh
   Test += "(";
 
   for (auto I = Arches.begin(), E = Arches.end(); I != E; ++I) {
-std::string Part = *I;
-Test += "T.getArch() == llvm::Triple::" + Part;
+StringRef Part = *I;
+Test += "T.getArch() == llvm::Triple::";
+Test += Part;
 if (I + 1 != E)
   Test += " || ";
 if (FnName)
@@ -2627,11 +2630,12 @@ static void GenerateTargetSpecificAttrCh
 // We know that there was at least one arch test, so we need to and in the
 // OS tests.
 Test += " && (";
-std::vector OSes = R->getValueAsListOfStrings("OSes");
+std::vector OSes = R->getValueAsListOfStrings("OSes");
 for (auto I = OSes.begin(), E = OSes.end(); I != E; ++I) {
-  std::string Part = *I;
+  StringRef Part = *I;
 
-  Test += "T.getOS() == llvm::Triple::" + Part;
+  Test += "T.getOS() == llvm::Triple::";
+  Test += Part;
   if (I + 1 != E)
 Test += " || ";
   if (FnName)
@@ -2643,10 +2647,11 @@ static void GenerateTargetSpecificAttrCh
   // If one or more CXX ABIs are specified, check those as well.
   if (!R->isValueUnset("CXXABIs")) {
 Test += " && (";
-std::vector CXXABIs = R->getValueAsListOfStrings("CXXABIs");
+std::vector CXXABIs = R->getValueAsListOfStrings("CXXABIs");
 for (auto I = CXXABIs.begin(), E = CXXABIs.end(); I != E; ++I) {
-  std::string Part = *I;
-  Test += "Target.getCXXABI().getKind() == TargetCXXABI::" + Part;
+  StringRef Part = *I;
+  Test += "Target.getCXXABI().getKind() == TargetCXXABI::";
+  Test += Part;
   if (I + 1 != E)
 Test += " || ";
   if (FnName)
@@ -2684,7 +2689,7 @@ static void GenerateHasAttrSpellingStrin
 std::string Test;
 if (Attr->isSubClassOf

r305439 - [Basic] Use a static_assert instead of using the old array of size -1 trick.

2017-06-14 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Jun 14 20:27:58 2017
New Revision: 305439

URL: http://llvm.org/viewvc/llvm-project?rev=305439&view=rev
Log:
[Basic] Use a static_assert instead of using the old array of size -1 trick.

Modified:
cfe/trunk/include/clang/Basic/AllDiagnostics.h

Modified: cfe/trunk/include/clang/Basic/AllDiagnostics.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/AllDiagnostics.h?rev=305439&r1=305438&r2=305439&view=diff
==
--- cfe/trunk/include/clang/Basic/AllDiagnostics.h (original)
+++ cfe/trunk/include/clang/Basic/AllDiagnostics.h Wed Jun 14 20:27:58 2017
@@ -28,7 +28,7 @@
 namespace clang {
 template 
 class StringSizerHelper {
-  char FIELD_TOO_SMALL[SizeOfStr <= FieldType(~0U) ? 1 : -1];
+  static_assert(SizeOfStr <= FieldType(~0U), "Field too small!");
 public:
   enum { Size = SizeOfStr };
 };


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r320915 - [X86] Add builtins and tests for 128 and 256 bit vpopcntdq.

2017-12-15 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri Dec 15 22:02:31 2017
New Revision: 320915

URL: http://llvm.org/viewvc/llvm-project?rev=320915&view=rev
Log:
[X86] Add builtins and tests for 128 and 256 bit vpopcntdq.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/CMakeLists.txt
cfe/trunk/lib/Headers/immintrin.h

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=320915&r1=320914&r2=320915&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Dec 15 22:02:31 2017
@@ -1060,6 +1060,10 @@ TARGET_BUILTIN(__builtin_ia32_vpconflict
 TARGET_BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "", 
"avx512cd")
 TARGET_BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "", 
"avx512cd")
 
+TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "", 
"avx512vpopcntdq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2LLiV2LLi", "", 
"avx512vpopcntdq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpopcntd_256, "V8iV8i", "", 
"avx512vpopcntdq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "", 
"avx512vpopcntdq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq")
 TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", 
"avx512vpopcntdq")
 

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=320915&r1=320914&r2=320915&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Dec 15 22:02:31 2017
@@ -7955,6 +7955,10 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   case X86::BI__builtin_ia32_storesd128_mask: {
 return EmitX86MaskedStore(*this, Ops, 16);
   }
+  case X86::BI__builtin_ia32_vpopcntd_128:
+  case X86::BI__builtin_ia32_vpopcntq_128:
+  case X86::BI__builtin_ia32_vpopcntd_256:
+  case X86::BI__builtin_ia32_vpopcntq_256:
   case X86::BI__builtin_ia32_vpopcntd_512:
   case X86::BI__builtin_ia32_vpopcntq_512: {
 llvm::Type *ResultType = ConvertType(E->getType());

Modified: cfe/trunk/lib/Headers/CMakeLists.txt
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/CMakeLists.txt?rev=320915&r1=320914&r2=320915&view=diff
==
--- cfe/trunk/lib/Headers/CMakeLists.txt (original)
+++ cfe/trunk/lib/Headers/CMakeLists.txt Fri Dec 15 22:02:31 2017
@@ -21,6 +21,7 @@ set(files
   avx512vlcdintrin.h
   avx512vldqintrin.h
   avx512vlintrin.h
+  avx512vpopcntdqvlintrin.h
   avxintrin.h
   bmi2intrin.h
   bmiintrin.h

Modified: cfe/trunk/lib/Headers/immintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=320915&r1=320914&r2=320915&view=diff
==
--- cfe/trunk/lib/Headers/immintrin.h (original)
+++ cfe/trunk/lib/Headers/immintrin.h Fri Dec 15 22:02:31 2017
@@ -154,6 +154,11 @@ _mm256_cvtph_ps(__m128i __a)
 #include 
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+(defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
+#include 
+#endif
+
 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
 #include 
 #endif


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r320916 - [X86] Add the two files I forgot to commit in r320915.

2017-12-15 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Fri Dec 15 22:10:24 2017
New Revision: 320916

URL: http://llvm.org/viewvc/llvm-project?rev=320916&view=rev
Log:
[X86] Add the two files I forgot to commit in r320915.

Added:
cfe/trunk/lib/Headers/avx512vpopcntdqvlintrin.h
cfe/trunk/test/CodeGen/avx512vpopcntdqvlintrin.c

Added: cfe/trunk/lib/Headers/avx512vpopcntdqvlintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vpopcntdqvlintrin.h?rev=320916&view=auto
==
--- cfe/trunk/lib/Headers/avx512vpopcntdqvlintrin.h (added)
+++ cfe/trunk/lib/Headers/avx512vpopcntdqvlintrin.h Fri Dec 15 22:10:24 2017
@@ -0,0 +1,99 @@
+/*===- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics
+ *--===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===---===
+ */
+#ifndef __IMMINTRIN_H
+#error 
\
+"Never use  directly; include  
instead."
+#endif
+
+#ifndef __AVX512VPOPCNTDQVLINTRIN_H
+#define __AVX512VPOPCNTDQVLINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS 
\
+  __attribute__((__always_inline__, __nodebug__, 
__target__("avx512vpopcntdq,avx512vl")))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi64(__m128i __A) {
+  return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i)__builtin_ia32_selectq_128(
+  (__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
+  return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi32(__m128i __A) {
+  return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i)__builtin_ia32_selectd_128(
+  (__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
+  return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi64(__m256i __A) {
+  return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i)__builtin_ia32_selectq_256(
+  (__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
+  return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi32(__m256i __A) {
+  return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i)__builtin_ia32_selectd_256(
+  (__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) {
+  return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A);
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif

Added: cfe/trunk/test/CodeGen/avx512vpopcntdqvlintrin.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vpopcntdqvlintrin.c?rev=320916&view=auto
==

r320919 - [X86] Implement kand/kandn/kor/kxor/kxnor/knot intrinsics using native IR.

2017-12-16 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Sat Dec 16 00:26:22 2017
New Revision: 320919

URL: http://llvm.org/viewvc/llvm-project?rev=320919&view=rev
Log:
[X86] Implement kand/kandn/kor/kxor/kxnor/knot intrinsics using native IR.

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=320919&r1=320918&r2=320919&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sat Dec 16 00:26:22 2017
@@ -7564,6 +7564,19 @@ static Value *EmitX86MaskedLoad(CodeGenF
   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
 }
 
+static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps 
Opc,
+  unsigned NumElts, SmallVectorImpl &Ops,
+  bool InvertLHS = false) {
+  Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
+  Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
+
+  if (InvertLHS)
+LHS = CGF.Builder.CreateNot(LHS);
+
+  return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
+  CGF.Builder.getIntNTy(std::max(NumElts, 
8U)));
+}
+
 static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF,
 SmallVectorImpl &Ops,
 llvm::Type *DstTy,
@@ -8217,6 +8230,22 @@ Value *CodeGenFunction::EmitX86BuiltinEx
 return EmitX86MaskedCompare(*this, CC, false, Ops);
   }
 
+  case X86::BI__builtin_ia32_kandhi:
+return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
+  case X86::BI__builtin_ia32_kandnhi:
+return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true);
+  case X86::BI__builtin_ia32_korhi:
+return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
+  case X86::BI__builtin_ia32_kxnorhi:
+return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true);
+  case X86::BI__builtin_ia32_kxorhi:
+return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops);
+  case X86::BI__builtin_ia32_knothi: {
+Ops[0] = getMaskVecValue(*this, Ops[0], 16);
+return Builder.CreateBitCast(Builder.CreateNot(Ops[0]),
+ Builder.getInt16Ty());
+  }
+
   case X86::BI__builtin_ia32_vplzcntd_128_mask:
   case X86::BI__builtin_ia32_vplzcntd_256_mask:
   case X86::BI__builtin_ia32_vplzcntd_512_mask:

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=320919&r1=320918&r2=320919&view=diff
==
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sat Dec 16 00:26:22 2017
@@ -385,7 +385,9 @@ __m512d test_mm512_set1_pd(double d)
 __mmask16 test_mm512_knot(__mmask16 a)
 {
   // CHECK-LABEL: @test_mm512_knot
-  // CHECK: @llvm.x86.avx512.knot.w
+  // CHECK: [[IN:%.*]] = bitcast i16 %1 to <16 x i1>
+  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], 
+  // CHECK: bitcast <16 x i1> [[NOT]] to i16
   return _mm512_knot(a);
 }
 
@@ -6211,22 +6213,38 @@ __m512i test_mm512_mask_permutexvar_epi3
   return _mm512_mask_permutexvar_epi32(__W, __M, __X, __Y); 
 }
 
-__mmask16 test_mm512_kand(__mmask16 __A, __mmask16 __B) {
+__mmask16 test_mm512_kand(__m512i __A, __m512i __B, __m512i __C, __m512i __D, 
__m512i __E, __m512i __F) {
   // CHECK-LABEL: @test_mm512_kand
-  // CHECK: @llvm.x86.avx512.kand.w
-  return _mm512_kand(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RES:%.*]] = and <16 x i1> [[LHS]], [[RHS]]
+  // CHECK: bitcast <16 x i1> [[RES]] to i16
+  return 
_mm512_mask_cmpneq_epu32_mask(_mm512_kand(_mm512_cmpneq_epu32_mask(__A, __B),
+   
_mm512_cmpneq_epu32_mask(__C, __D)),
+   __E, __F);
 }
 
-__mmask16 test_mm512_kandn(__mmask16 __A, __mmask16 __B) {
+__mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, 
__m512i __E, __m512i __F) {
   // CHECK-LABEL: @test_mm512_kandn
-  // CHECK: @llvm.x86.avx512.kandn.w
-  return _mm512_kandn(__A, __B); 
+  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], 
+  // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]]
+  // CHECK: bitcast <16 x i1> [[RES]] to i16
+  return 
_mm512_mask_cmpneq_epu32_mask(_mm512_kandn(_mm512_cmpneq_epu32_mask(__A, __B),
+
_mm512_cmpneq_epu32_mask(__C, __D)),
+__E, __F);
 }
 
-__mmask16 test_mm512_kor(

Re: r320971 - [X86] Use {{.*}} instead of hardcoded %1 in knot test.

2017-12-18 Thread Craig Topper via cfe-commits
Thanks!

~Craig

On Mon, Dec 18, 2017 at 3:29 AM, Martin Bohme via cfe-commits <
cfe-commits@lists.llvm.org> wrote:

> Author: mboehme
> Date: Mon Dec 18 03:29:21 2017
> New Revision: 320971
>
> URL: http://llvm.org/viewvc/llvm-project?rev=320971&view=rev
> Log:
> [X86] Use {{.*}} instead of hardcoded %1 in knot test.
>
> This makes the test more resilient and consistent with the other tests
> introduced in r320919.
>
> Modified:
> cfe/trunk/test/CodeGen/avx512f-builtins.c
>
> Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/
> CodeGen/avx512f-builtins.c?rev=320971&r1=320970&r2=320971&view=diff
> 
> ==
> --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
> +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Mon Dec 18 03:29:21 2017
> @@ -385,7 +385,7 @@ __m512d test_mm512_set1_pd(double d)
>  __mmask16 test_mm512_knot(__mmask16 a)
>  {
>// CHECK-LABEL: @test_mm512_knot
> -  // CHECK: [[IN:%.*]] = bitcast i16 %1 to <16 x i1>
> +  // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
>// CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]],  true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1
> true, i1 true, i1 true, i1 true, i1 true, i1 true>
>// CHECK: bitcast <16 x i1> [[NOT]] to i16
>return _mm512_knot(a);
>
>
> ___
> cfe-commits mailing list
> cfe-commits@lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r321129 - [X86] Add more CPUID bits to cpuid.h to match gcc and support icelake features.

2017-12-19 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue Dec 19 16:46:09 2017
New Revision: 321129

URL: http://llvm.org/viewvc/llvm-project?rev=321129&view=rev
Log:
[X86] Add more CPUID bits to cpuid.h to match gcc and support icelake features.

Modified:
cfe/trunk/lib/Headers/cpuid.h

Modified: cfe/trunk/lib/Headers/cpuid.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/cpuid.h?rev=321129&r1=321128&r2=321129&view=diff
==
--- cfe/trunk/lib/Headers/cpuid.h (original)
+++ cfe/trunk/lib/Headers/cpuid.h Tue Dec 19 16:46:09 2017
@@ -173,16 +173,24 @@
 #define bit_AVX512VL0x8000
 
 /* Features in %ecx for leaf 7 sub-leaf 0 */
-#define bit_PREFTCHWT1  0x0001
-#define bit_AVX512VBMI  0x0002
-#define bit_PKU 0x0004
-#define bit_OSPKE   0x0010
+#define bit_PREFTCHWT1   0x0001
+#define bit_AVX512VBMI   0x0002
+#define bit_PKU  0x0004
+#define bit_OSPKE0x0010
+#define bit_AVX512VBMI2  0x0040
+#define bit_SHSTK0x0080
+#define bit_GFNI 0x0100
+#define bit_VAES 0x0200
+#define bit_VPCLMULQDQ   0x0400
+#define bit_AVX512VNNI   0x0800
+#define bit_AVX512BITALG 0x1000
 #define bit_AVX512VPOPCNTDQ  0x4000
-#define bit_RDPID   0x0040
+#define bit_RDPID0x0040
 
 /* Features in %edx for leaf 7 sub-leaf 0 */
 #define bit_AVX5124VNNIW  0x0004
 #define bit_AVX5124FMAPS  0x0008
+#define bit_IBT   0x0010
 
 /* Features in %eax for leaf 13 sub-leaf 1 */
 #define bit_XSAVEOPT0x0001
@@ -192,6 +200,7 @@
 /* Features in %ecx for leaf 0x8001 */
 #define bit_LAHF_LM 0x0001
 #define bit_ABM 0x0020
+#define bit_LZCNT   bit_ABM/* for gcc compat */
 #define bit_SSE4a   0x0040
 #define bit_PRFCHW  0x0100
 #define bit_XOP 0x0800


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r321325 - [X86] Allow _mm_prefetch (both the header implementation and the builtin) to accept bit 2 which is supposed to indicate the prefetched addresses will be written to

2017-12-21 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Thu Dec 21 15:50:22 2017
New Revision: 321325

URL: http://llvm.org/viewvc/llvm-project?rev=321325&view=rev
Log:
[X86] Allow _mm_prefetch (both the header implementation and the builtin) to 
accept bit 2 which is supposed to indicate the prefetched addresses will be 
written to

Add the appropriate _MM_HINT_ET0/ET1 defines to match gcc.

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/xmmintrin.h
cfe/trunk/lib/Sema/SemaChecking.cpp

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=321325&r1=321324&r2=321325&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Dec 21 15:50:22 2017
@@ -8022,8 +8022,9 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   default: return nullptr;
   case X86::BI_mm_prefetch: {
 Value *Address = Ops[0];
-Value *RW = ConstantInt::get(Int32Ty, 0);
-Value *Locality = Ops[1];
+ConstantInt *C = cast(Ops[1]);
+Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
+Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
 Value *Data = ConstantInt::get(Int32Ty, 1);
 Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
 return Builder.CreateCall(F, {Address, RW, Locality, Data});

Modified: cfe/trunk/lib/Headers/xmmintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/xmmintrin.h?rev=321325&r1=321324&r2=321325&view=diff
==
--- cfe/trunk/lib/Headers/xmmintrin.h (original)
+++ cfe/trunk/lib/Headers/xmmintrin.h Thu Dec 21 15:50:22 2017
@@ -2035,9 +2035,11 @@ _mm_storer_ps(float *__p, __m128 __a)
   _mm_store_ps(__p, __a);
 }
 
-#define _MM_HINT_T0 3
-#define _MM_HINT_T1 2
-#define _MM_HINT_T2 1
+#define _MM_HINT_ET0 7
+#define _MM_HINT_ET1 6
+#define _MM_HINT_T0  3
+#define _MM_HINT_T1  2
+#define _MM_HINT_T2  1
 #define _MM_HINT_NTA 0
 
 #ifndef _MSC_VER
@@ -2068,7 +2070,8 @@ _mm_storer_ps(float *__p, __m128 __a)
 ///be generated. \n
 ///_MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction 
will
 ///be generated.
-#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel)))
+#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \
+ ((sel) >> 2) & 1, (sel) & 
0x3))
 #endif
 
 /// \brief Stores a 64-bit integer in the specified aligned memory location. To

Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=321325&r1=321324&r2=321325&view=diff
==
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Thu Dec 21 15:50:22 2017
@@ -2278,7 +2278,7 @@ bool Sema::CheckX86BuiltinFunctionCall(u
   default:
 return false;
   case X86::BI_mm_prefetch:
-i = 1; l = 0; u = 3;
+i = 1; l = 0; u = 7;
 break;
   case X86::BI__builtin_ia32_sha1rnds4:
   case X86::BI__builtin_ia32_shuf_f32x4_256_mask:


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r321341 - [X86] Add 'prfchw' to the correct CPUs to match the backend.

2017-12-21 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Thu Dec 21 20:51:00 2017
New Revision: 321341

URL: http://llvm.org/viewvc/llvm-project?rev=321341&view=rev
Log:
[X86] Add 'prfchw' to the correct CPUs to match the backend.

Modified:
cfe/trunk/lib/Basic/Targets/X86.cpp
cfe/trunk/test/Preprocessor/predefined-arch-macros.c

Modified: cfe/trunk/lib/Basic/Targets/X86.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=321341&r1=321340&r2=321341&view=diff
==
--- cfe/trunk/lib/Basic/Targets/X86.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/X86.cpp Thu Dec 21 20:51:00 2017
@@ -159,6 +159,7 @@ bool X86TargetInfo::initFeatureMap(
   case CK_Broadwell:
 setFeatureEnabledImpl(Features, "rdseed", true);
 setFeatureEnabledImpl(Features, "adx", true);
+setFeatureEnabledImpl(Features, "prfchw", true);
 LLVM_FALLTHROUGH;
   case CK_Haswell:
 setFeatureEnabledImpl(Features, "avx2", true);
@@ -224,6 +225,7 @@ bool X86TargetInfo::initFeatureMap(
 setFeatureEnabledImpl(Features, "aes", true);
 setFeatureEnabledImpl(Features, "pclmul", true);
 setFeatureEnabledImpl(Features, "sse4.2", true);
+setFeatureEnabledImpl(Features, "prfchw", true);
 LLVM_FALLTHROUGH;
   case CK_Bonnell:
 setFeatureEnabledImpl(Features, "movbe", true);
@@ -241,6 +243,7 @@ bool X86TargetInfo::initFeatureMap(
 setFeatureEnabledImpl(Features, "avx512cd", true);
 setFeatureEnabledImpl(Features, "avx512er", true);
 setFeatureEnabledImpl(Features, "avx512pf", true);
+setFeatureEnabledImpl(Features, "prfchw", true);
 setFeatureEnabledImpl(Features, "prefetchwt1", true);
 setFeatureEnabledImpl(Features, "fxsr", true);
 setFeatureEnabledImpl(Features, "rdseed", true);

Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=321341&r1=321340&r2=321341&view=diff
==
--- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original)
+++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Thu Dec 21 20:51:00 
2017
@@ -589,6 +589,7 @@
 // CHECK_BROADWELL_M32: #define __MMX__ 1
 // CHECK_BROADWELL_M32: #define __PCLMUL__ 1
 // CHECK_BROADWELL_M32: #define __POPCNT__ 1
+// CHECK_BROADWELL_M32: #define __PRFCHW__ 1
 // CHECK_BROADWELL_M32: #define __RDRND__ 1
 // CHECK_BROADWELL_M32: #define __RDSEED__ 1
 // CHECK_BROADWELL_M32: #define __SSE2__ 1
@@ -620,6 +621,7 @@
 // CHECK_BROADWELL_M64: #define __MMX__ 1
 // CHECK_BROADWELL_M64: #define __PCLMUL__ 1
 // CHECK_BROADWELL_M64: #define __POPCNT__ 1
+// CHECK_BROADWELL_M64: #define __PRFCHW__ 1
 // CHECK_BROADWELL_M64: #define __RDRND__ 1
 // CHECK_BROADWELL_M64: #define __RDSEED__ 1
 // CHECK_BROADWELL_M64: #define __SSE2_MATH__ 1
@@ -657,6 +659,7 @@
 // CHECK_SKL_M32: #define __MPX__ 1
 // CHECK_SKL_M32: #define __PCLMUL__ 1
 // CHECK_SKL_M32: #define __POPCNT__ 1
+// CHECK_SKL_M32: #define __PRFCHW__ 1
 // CHECK_SKL_M32: #define __RDRND__ 1
 // CHECK_SKL_M32: #define __RDSEED__ 1
 // CHECK_SKL_M32: #define __RTM__ 1
@@ -690,6 +693,7 @@
 // CHECK_SKL_M64: #define __MPX__ 1
 // CHECK_SKL_M64: #define __PCLMUL__ 1
 // CHECK_SKL_M64: #define __POPCNT__ 1
+// CHECK_SKL_M64: #define __PRFCHW__ 1
 // CHECK_SKL_M64: #define __RDRND__ 1
 // CHECK_SKL_M64: #define __RDSEED__ 1
 // CHECK_SKL_M64: #define __RTM__ 1
@@ -730,6 +734,7 @@
 // CHECK_KNL_M32: #define __PCLMUL__ 1
 // CHECK_KNL_M32: #define __POPCNT__ 1
 // CHECK_KNL_M32: #define __PREFETCHWT1__ 1
+// CHECK_KNL_M32: #define __PRFCHW__ 1
 // CHECK_KNL_M32: #define __RDRND__ 1
 // CHECK_KNL_M32: #define __RTM__ 1
 // CHECK_KNL_M32: #define __SSE2__ 1
@@ -766,6 +771,7 @@
 // CHECK_KNL_M64: #define __PCLMUL__ 1
 // CHECK_KNL_M64: #define __POPCNT__ 1
 // CHECK_KNL_M64: #define __PREFETCHWT1__ 1
+// CHECK_KNL_M64: #define __PRFCHW__ 1
 // CHECK_KNL_M64: #define __RDRND__ 1
 // CHECK_KNL_M64: #define __RTM__ 1
 // CHECK_KNL_M64: #define __SSE2_MATH__ 1
@@ -806,6 +812,7 @@
 // CHECK_KNM_M32: #define __PCLMUL__ 1
 // CHECK_KNM_M32: #define __POPCNT__ 1
 // CHECK_KNM_M32: #define __PREFETCHWT1__ 1
+// CHECK_KNM_M32: #define __PRFCHW__ 1
 // CHECK_KNM_M32: #define __RDRND__ 1
 // CHECK_KNM_M32: #define __RTM__ 1
 // CHECK_KNM_M32: #define __SSE2__ 1
@@ -840,6 +847,7 @@
 // CHECK_KNM_M64: #define __PCLMUL__ 1
 // CHECK_KNM_M64: #define __POPCNT__ 1
 // CHECK_KNM_M64: #define __PREFETCHWT1__ 1
+// CHECK_KNM_M64: #define __PRFCHW__ 1
 // CHECK_KNM_M64: #define __RDRND__ 1
 // CHECK_KNM_M64: #define __RTM__ 1
 // CHECK_KNM_M64: #define __SSE2_MATH__ 1
@@ -879,6 +887,7 @@
 // CHECK_SKX_M32: #define __MPX__ 1
 // CHECK_SKX_M32: #define __PCLMUL__ 1
 // CHECK_SKX_M32: #define __POPCNT__ 1
+// CHECK_SKX_M32: #define __PRFCHW__ 1
 // CHECK_SKX_M32: #define __RDRND__ 1
 // CHECK_SKX_M32: #define __RTM__ 1
 // CHECK_SKX_M32: #define __SGX__ 1
@@ -9

r321343 - [X86] Add missing check lines for the silvermont cases in predefined-arch-macros.c test.

2017-12-21 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Thu Dec 21 21:09:38 2017
New Revision: 321343

URL: http://llvm.org/viewvc/llvm-project?rev=321343&view=rev
Log:
[X86] Add missing check lines for the silvermont cases in 
predefined-arch-macros.c test.

Modified:
cfe/trunk/test/Preprocessor/predefined-arch-macros.c

Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=321343&r1=321342&r2=321343&view=diff
==
--- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original)
+++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Thu Dec 21 21:09:38 
2017
@@ -1234,7 +1234,11 @@
 // RUN: %clang -march=slm -m32 -E -dM %s -o - 2>&1 \
 // RUN: -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SLM_M32
+// CHECK_SLM_M32: #define __AES__ 1
+// CHECK_SLM_M32: #define __FXSR__ 1
 // CHECK_SLM_M32: #define __MMX__ 1
+// CHECK_SLM_M32: #define __PCLMUL__ 1
+// CHECK_SLM_M32: #define __POPCNT__ 1
 // CHECK_SLM_M32: #define __PRFCHW__ 1
 // CHECK_SLM_M32: #define __SSE2__ 1
 // CHECK_SLM_M32: #define __SSE3__ 1
@@ -1251,7 +1255,11 @@
 // RUN: %clang -march=slm -m64 -E -dM %s -o - 2>&1 \
 // RUN: -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SLM_M64
+// CHECK_SLM_M64: #define __AES__ 1
+// CHECK_SLM_M64: #define __FXSR__ 1
 // CHECK_SLM_M64: #define __MMX__ 1
+// CHECK_SLM_M64: #define __PCLMUL__ 1
+// CHECK_SLM_M64: #define __POPCNT__ 1
 // CHECK_SLM_M64: #define __PRFCHW__ 1
 // CHECK_SLM_M64: #define __SSE2_MATH__ 1
 // CHECK_SLM_M64: #define __SSE2__ 1


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D41583: [x86][icelake][vaes]

2017-12-26 Thread Craig Topper via cfe-commits
I meant if the command line says “-mvaes -mno-aes” we should make sure to
disable vaes

On Tue, Dec 26, 2017 at 9:47 AM coby via Phabricator <
revi...@reviews.llvm.org> wrote:

> coby added inline comments.
>
>
> 
> Comment at: lib/Basic/Targets/X86.cpp:573
>  setMMXLevel(Features, AMD3DNowAthlon, Enabled);
>} else if (Name == "aes") {
>  if (Enabled)
> 
> craig.topper wrote:
> > Shouldn't -aes imply -vaes?
> how come? perhaps i'm missing here something?
> why would the first imply the latter?
> following this road an atom z8XXX should be capable of supporting vaes,
> for example (where it lacks avx, for example,
> https://www.intel.com/content/www/us/en/processors/atom/atom-z8000-datasheet-vol-1.html
> )
> also, in that sense, pclmul is implying vpclmulqdq
>
>
> Repository:
>   rC Clang
>
> https://reviews.llvm.org/D41583
>
>
>
> --
~Craig
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r321502 - [X86] Enable avx512vpopcntdq and clwb for icelake.

2017-12-27 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Dec 27 14:25:59 2017
New Revision: 321502

URL: http://llvm.org/viewvc/llvm-project?rev=321502&view=rev
Log:
[X86] Enable avx512vpopcntdq and clwb for icelake.

Per table 1-1 of the October 2017 edition of Intel® Architecture Instruction 
Set Extensions and Future Features Programming Reference

Modified:
cfe/trunk/lib/Basic/Targets/X86.cpp
cfe/trunk/test/Preprocessor/predefined-arch-macros.c

Modified: cfe/trunk/lib/Basic/Targets/X86.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=321502&r1=321501&r2=321502&view=diff
==
--- cfe/trunk/lib/Basic/Targets/X86.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/X86.cpp Wed Dec 27 14:25:59 2017
@@ -132,13 +132,14 @@ bool X86TargetInfo::initFeatureMap(
 break;
 
   case CK_Icelake:
-// TODO: Add icelake features here.
 setFeatureEnabledImpl(Features, "vaes", true);
 setFeatureEnabledImpl(Features, "gfni", true);
 setFeatureEnabledImpl(Features, "vpclmulqdq", true);
 setFeatureEnabledImpl(Features, "avx512bitalg", true);
 setFeatureEnabledImpl(Features, "avx512vnni", true);
 setFeatureEnabledImpl(Features, "avx512vbmi2", true);
+setFeatureEnabledImpl(Features, "avx512vpopcntdq", true);
+setFeatureEnabledImpl(Features, "clwb", true);
 LLVM_FALLTHROUGH;
   case CK_Cannonlake:
 setFeatureEnabledImpl(Features, "avx512ifma", true);

Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=321502&r1=321501&r2=321502&view=diff
==
--- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original)
+++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Wed Dec 27 14:25:59 
2017
@@ -1060,10 +1060,12 @@
 // CHECK_ICL_M32: #define __AVX512VBMI__ 1
 // CHECK_ICL_M32: #define __AVX512VL__ 1
 // CHECK_ICL_M32: #define __AVX512VNNI__ 1
+// CHECK_ICL_M32: #define __AVX512VPOPCNTDQ__ 1
 // CHECK_ICL_M32: #define __AVX__ 1
 // CHECK_ICL_M32: #define __BMI2__ 1
 // CHECK_ICL_M32: #define __BMI__ 1
 // CHECK_ICL_M32: #define __CLFLUSHOPT__ 1
+// CHECK_ICL_M32: #define __CLWB__ 1
 // CHECK_ICL_M32: #define __F16C__ 1
 // CHECK_ICL_M32: #define __FMA__ 1
 // CHECK_ICL_M32: #define __GFNI__ 1
@@ -,10 +1113,12 @@
 // CHECK_ICL_M64: #define __AVX512VBMI__ 1
 // CHECK_ICL_M64: #define __AVX512VL__ 1
 // CHECK_ICL_M64: #define __AVX512VNNI__ 1
+// CHECK_ICL_M64: #define __AVX512VPOPCNTDQ__ 1
 // CHECK_ICL_M64: #define __AVX__ 1
 // CHECK_ICL_M64: #define __BMI2__ 1
 // CHECK_ICL_M64: #define __BMI__ 1
 // CHECK_ICL_M64: #define __CLFLUSHOPT__ 1
+// CHECK_ICL_M64: #define __CLWB__ 1
 // CHECK_ICL_M64: #define __F16C__ 1
 // CHECK_ICL_M64: #define __FMA__ 1
 // CHECK_ICL_M64: #define __GFNI__ 1


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r321503 - [X86] Test that -march=skx enables PKU.

2017-12-27 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Dec 27 14:26:00 2017
New Revision: 321503

URL: http://llvm.org/viewvc/llvm-project?rev=321503&view=rev
Log:
[X86] Test that -march=skx enables PKU.

Modified:
cfe/trunk/test/Preprocessor/predefined-arch-macros.c

Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=321503&r1=321502&r2=321503&view=diff
==
--- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original)
+++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Wed Dec 27 14:26:00 
2017
@@ -886,6 +886,7 @@
 // CHECK_SKX_M32: #define __MMX__ 1
 // CHECK_SKX_M32: #define __MPX__ 1
 // CHECK_SKX_M32: #define __PCLMUL__ 1
+// CHECK_SKX_M32: #define __PKU__ 1
 // CHECK_SKX_M32: #define __POPCNT__ 1
 // CHECK_SKX_M32: #define __PRFCHW__ 1
 // CHECK_SKX_M32: #define __RDRND__ 1
@@ -929,6 +930,7 @@
 // CHECK_SKX_M64: #define __MMX__ 1
 // CHECK_SKX_M64: #define __MPX__ 1
 // CHECK_SKX_M64: #define __PCLMUL__ 1
+// CHECK_SKX_M64: #define __PKU__ 1
 // CHECK_SKX_M64: #define __POPCNT__ 1
 // CHECK_SKX_M64: #define __PRFCHW__ 1
 // CHECK_SKX_M64: #define __RDRND__ 1


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r321504 - [X86] Don't accidentally enable PKU on cannon lake and icelake or CLWB on cannonlake.

2017-12-27 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Dec 27 14:26:01 2017
New Revision: 321504

URL: http://llvm.org/viewvc/llvm-project?rev=321504&view=rev
Log:
[X86] Don't accidentally enable PKU on cannon lake and icelake or CLWB on 
cannonlake.

We have cannonlake and icelake inheriting from skylake server in a switch using 
fallthroughs. But they aren't perfect supersets of skylake server.

Modified:
cfe/trunk/lib/Basic/Targets/X86.cpp
cfe/trunk/test/Preprocessor/predefined-arch-macros.c

Modified: cfe/trunk/lib/Basic/Targets/X86.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=321504&r1=321503&r2=321504&view=diff
==
--- cfe/trunk/lib/Basic/Targets/X86.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/X86.cpp Wed Dec 27 14:26:01 2017
@@ -152,8 +152,10 @@ bool X86TargetInfo::initFeatureMap(
 setFeatureEnabledImpl(Features, "avx512dq", true);
 setFeatureEnabledImpl(Features, "avx512bw", true);
 setFeatureEnabledImpl(Features, "avx512vl", true);
-setFeatureEnabledImpl(Features, "pku", true);
-setFeatureEnabledImpl(Features, "clwb", true);
+if (Kind == CK_SkylakeServer) {
+  setFeatureEnabledImpl(Features, "pku", true);
+  setFeatureEnabledImpl(Features, "clwb", true);
+}
 LLVM_FALLTHROUGH;
   case CK_SkylakeClient:
 setFeatureEnabledImpl(Features, "xsavec", true);

Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=321504&r1=321503&r2=321504&view=diff
==
--- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original)
+++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Wed Dec 27 14:26:01 
2017
@@ -972,12 +972,14 @@
 // CHECK_CNL_M32: #define __BMI2__ 1
 // CHECK_CNL_M32: #define __BMI__ 1
 // CHECK_CNL_M32: #define __CLFLUSHOPT__ 1
+// CHECK_CNL_M32-NOT: #define __CLWB__ 1
 // CHECK_CNL_M32: #define __F16C__ 1
 // CHECK_CNL_M32: #define __FMA__ 1
 // CHECK_CNL_M32: #define __LZCNT__ 1
 // CHECK_CNL_M32: #define __MMX__ 1
 // CHECK_CNL_M32: #define __MPX__ 1
 // CHECK_CNL_M32: #define __PCLMUL__ 1
+// CHECK_CNL_M32-NOT: #define __PKU__ 1
 // CHECK_CNL_M32: #define __POPCNT__ 1
 // CHECK_CNL_M32: #define __PRFCHW__ 1
 // CHECK_CNL_M32: #define __RDRND__ 1
@@ -1017,12 +1019,14 @@
 // CHECK_CNL_M64: #define __BMI2__ 1
 // CHECK_CNL_M64: #define __BMI__ 1
 // CHECK_CNL_M64: #define __CLFLUSHOPT__ 1
+// CHECK_CNL_M64-NOT: #define __CLWB__ 1
 // CHECK_CNL_M64: #define __F16C__ 1
 // CHECK_CNL_M64: #define __FMA__ 1
 // CHECK_CNL_M64: #define __LZCNT__ 1
 // CHECK_CNL_M64: #define __MMX__ 1
 // CHECK_CNL_M64: #define __MPX__ 1
 // CHECK_CNL_M64: #define __PCLMUL__ 1
+// CHECK_CNL_M64-NOT: #define __PKU__ 1
 // CHECK_CNL_M64: #define __POPCNT__ 1
 // CHECK_CNL_M64: #define __PRFCHW__ 1
 // CHECK_CNL_M64: #define __RDRND__ 1
@@ -1075,6 +1079,7 @@
 // CHECK_ICL_M32: #define __MMX__ 1
 // CHECK_ICL_M32: #define __MPX__ 1
 // CHECK_ICL_M32: #define __PCLMUL__ 1
+// CHECK_ICL_M32-NOT: #define __PKU__ 1
 // CHECK_ICL_M32: #define __POPCNT__ 1
 // CHECK_ICL_M32: #define __PRFCHW__ 1
 // CHECK_ICL_M32: #define __RDRND__ 1
@@ -1128,6 +1133,7 @@
 // CHECK_ICL_M64: #define __MMX__ 1
 // CHECK_ICL_M64: #define __MPX__ 1
 // CHECK_ICL_M64: #define __PCLMUL__ 1
+// CHECK_ICL_M64-NOT: #define __PKU__ 1
 // CHECK_ICL_M64: #define __POPCNT__ 1
 // CHECK_ICL_M64: #define __PRFCHW__ 1
 // CHECK_ICL_M64: #define __RDRND__ 1


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r321547 - Revert r321504 "[X86] Don't accidentally enable PKU on cannon lake and icelake or CLWB on cannonlake."

2017-12-28 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Thu Dec 28 22:39:16 2017
New Revision: 321547

URL: http://llvm.org/viewvc/llvm-project?rev=321547&view=rev
Log:
Revert r321504 "[X86] Don't accidentally enable PKU on cannon lake and icelake 
or CLWB on cannonlake."

I based that commit on what was in Intel's public documentation here 
https://software.intel.com/sites/default/files/managed/c5/15/architecture-instruction-set-extensions-programming-reference.pdf

Which specifically said CLWB wasn't until Icelake.

But I've since cross checked with SDE and it thinks these features exist on CNL 
and ICL. So now I don't know what to believe.

I've added test coverage of the current behavior as part of the revert so at 
least now have proof of what we're doing.

Modified:
cfe/trunk/lib/Basic/Targets/X86.cpp
cfe/trunk/test/Preprocessor/predefined-arch-macros.c

Modified: cfe/trunk/lib/Basic/Targets/X86.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=321547&r1=321546&r2=321547&view=diff
==
--- cfe/trunk/lib/Basic/Targets/X86.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/X86.cpp Thu Dec 28 22:39:16 2017
@@ -139,7 +139,6 @@ bool X86TargetInfo::initFeatureMap(
 setFeatureEnabledImpl(Features, "avx512vnni", true);
 setFeatureEnabledImpl(Features, "avx512vbmi2", true);
 setFeatureEnabledImpl(Features, "avx512vpopcntdq", true);
-setFeatureEnabledImpl(Features, "clwb", true);
 LLVM_FALLTHROUGH;
   case CK_Cannonlake:
 setFeatureEnabledImpl(Features, "avx512ifma", true);
@@ -152,10 +151,8 @@ bool X86TargetInfo::initFeatureMap(
 setFeatureEnabledImpl(Features, "avx512dq", true);
 setFeatureEnabledImpl(Features, "avx512bw", true);
 setFeatureEnabledImpl(Features, "avx512vl", true);
-if (Kind == CK_SkylakeServer) {
-  setFeatureEnabledImpl(Features, "pku", true);
-  setFeatureEnabledImpl(Features, "clwb", true);
-}
+setFeatureEnabledImpl(Features, "pku", true);
+setFeatureEnabledImpl(Features, "clwb", true);
 LLVM_FALLTHROUGH;
   case CK_SkylakeClient:
 setFeatureEnabledImpl(Features, "xsavec", true);

Modified: cfe/trunk/test/Preprocessor/predefined-arch-macros.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-arch-macros.c?rev=321547&r1=321546&r2=321547&view=diff
==
--- cfe/trunk/test/Preprocessor/predefined-arch-macros.c (original)
+++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c Thu Dec 28 22:39:16 
2017
@@ -972,14 +972,14 @@
 // CHECK_CNL_M32: #define __BMI2__ 1
 // CHECK_CNL_M32: #define __BMI__ 1
 // CHECK_CNL_M32: #define __CLFLUSHOPT__ 1
-// CHECK_CNL_M32-NOT: #define __CLWB__ 1
+// CHECK_CNL_M32: #define __CLWB__ 1
 // CHECK_CNL_M32: #define __F16C__ 1
 // CHECK_CNL_M32: #define __FMA__ 1
 // CHECK_CNL_M32: #define __LZCNT__ 1
 // CHECK_CNL_M32: #define __MMX__ 1
 // CHECK_CNL_M32: #define __MPX__ 1
 // CHECK_CNL_M32: #define __PCLMUL__ 1
-// CHECK_CNL_M32-NOT: #define __PKU__ 1
+// CHECK_CNL_M32: #define __PKU__ 1
 // CHECK_CNL_M32: #define __POPCNT__ 1
 // CHECK_CNL_M32: #define __PRFCHW__ 1
 // CHECK_CNL_M32: #define __RDRND__ 1
@@ -1019,14 +1019,14 @@
 // CHECK_CNL_M64: #define __BMI2__ 1
 // CHECK_CNL_M64: #define __BMI__ 1
 // CHECK_CNL_M64: #define __CLFLUSHOPT__ 1
-// CHECK_CNL_M64-NOT: #define __CLWB__ 1
+// CHECK_CNL_M64: #define __CLWB__ 1
 // CHECK_CNL_M64: #define __F16C__ 1
 // CHECK_CNL_M64: #define __FMA__ 1
 // CHECK_CNL_M64: #define __LZCNT__ 1
 // CHECK_CNL_M64: #define __MMX__ 1
 // CHECK_CNL_M64: #define __MPX__ 1
 // CHECK_CNL_M64: #define __PCLMUL__ 1
-// CHECK_CNL_M64-NOT: #define __PKU__ 1
+// CHECK_CNL_M64: #define __PKU__ 1
 // CHECK_CNL_M64: #define __POPCNT__ 1
 // CHECK_CNL_M64: #define __PRFCHW__ 1
 // CHECK_CNL_M64: #define __RDRND__ 1
@@ -1079,7 +1079,7 @@
 // CHECK_ICL_M32: #define __MMX__ 1
 // CHECK_ICL_M32: #define __MPX__ 1
 // CHECK_ICL_M32: #define __PCLMUL__ 1
-// CHECK_ICL_M32-NOT: #define __PKU__ 1
+// CHECK_ICL_M32: #define __PKU__ 1
 // CHECK_ICL_M32: #define __POPCNT__ 1
 // CHECK_ICL_M32: #define __PRFCHW__ 1
 // CHECK_ICL_M32: #define __RDRND__ 1
@@ -1133,7 +1133,7 @@
 // CHECK_ICL_M64: #define __MMX__ 1
 // CHECK_ICL_M64: #define __MPX__ 1
 // CHECK_ICL_M64: #define __PCLMUL__ 1
-// CHECK_ICL_M64-NOT: #define __PKU__ 1
+// CHECK_ICL_M64: #define __PKU__ 1
 // CHECK_ICL_M64: #define __POPCNT__ 1
 // CHECK_ICL_M64: #define __PRFCHW__ 1
 // CHECK_ICL_M64: #define __RDRND__ 1


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r321749 - [Docs] Re-generate command line documentation, primarily to get the icelake feature command line options in, but there were a couple other changes too.

2018-01-03 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Jan  3 10:29:12 2018
New Revision: 321749

URL: http://llvm.org/viewvc/llvm-project?rev=321749&view=rev
Log:
[Docs] Re-generate command line documentation, primarily to get the icelake 
feature command line options in, but there were a couple other changes too.

Modified:
cfe/trunk/docs/ClangCommandLineReference.rst

Modified: cfe/trunk/docs/ClangCommandLineReference.rst
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/docs/ClangCommandLineReference.rst?rev=321749&r1=321748&r2=321749&view=diff
==
--- cfe/trunk/docs/ClangCommandLineReference.rst (original)
+++ cfe/trunk/docs/ClangCommandLineReference.rst Wed Jan  3 10:29:12 2018
@@ -120,6 +120,10 @@ Output path for the plist report
 
 .. option:: -compatibility\_version
 
+.. option:: --config 
+
+Specifies configuration file
+
 .. option:: --constant-cfstrings
 
 .. option:: -coverage, --coverage
@@ -1545,6 +1549,10 @@ Enable ARC-style weak references in Obje
 
 OpenMP target code is compiled as relocatable using the -c flag. For OpenMP 
targets the code is relocatable by default.
 
+.. option:: -fopenmp-simd, -fno-openmp-simd
+
+Emit OpenMP code only for SIMD-based constructs.
+
 .. option:: -fopenmp-use-tls
 
 .. option:: -fopenmp-version=
@@ -1998,7 +2006,7 @@ Link stack frames through backchain on S
 
 .. option:: -mconsole
 
-.. option:: -mcpu=, -mv4 (equivalent to -mcpu=hexagonv4), -mv5 
(equivalent to -mcpu=hexagonv5), -mv55 (equivalent to -mcpu=hexagonv55), -mv60 
(equivalent to -mcpu=hexagonv60), -mv62 (equivalent to -mcpu=hexagonv62)
+.. option:: -mcpu=, -mv4 (equivalent to -mcpu=hexagonv4), -mv5 
(equivalent to -mcpu=hexagonv5), -mv55 (equivalent to -mcpu=hexagonv55), -mv60 
(equivalent to -mcpu=hexagonv60), -mv62 (equivalent to -mcpu=hexagonv62), -mv65 
(equivalent to -mcpu=hexagonv65)
 
 .. option:: -mdefault-build-attributes, -mno-default-build-attributes
 
@@ -2328,6 +2336,8 @@ X86
 
 .. option:: -mavx2, -mno-avx2
 
+.. option:: -mavx512bitalg, -mno-avx512bitalg
+
 .. option:: -mavx512bw, -mno-avx512bw
 
 .. option:: -mavx512cd, -mno-avx512cd
@@ -2344,8 +2354,12 @@ X86
 
 .. option:: -mavx512vbmi, -mno-avx512vbmi
 
+.. option:: -mavx512vbmi2, -mno-avx512vbmi2
+
 .. option:: -mavx512vl, -mno-avx512vl
 
+.. option:: -mavx512vnni, -mno-avx512vnni
+
 .. option:: -mavx512vpopcntdq, -mno-avx512vpopcntdq
 
 .. option:: -mbmi, -mno-bmi
@@ -2370,6 +2384,8 @@ X86
 
 .. option:: -mfxsr, -mno-fxsr
 
+.. option:: -mgfni, -mno-gfni
+
 .. option:: -mibt, -mno-ibt
 
 .. option:: -mlwp, -mno-lwp
@@ -2424,6 +2440,10 @@ X86
 
 .. option:: -mtbm, -mno-tbm
 
+.. option:: -mvaes, -mno-vaes
+
+.. option:: -mvpclmulqdq, -mno-vpclmulqdq
+
 .. option:: -mx87, -m80387, -mno-x87
 
 .. option:: -mxop, -mno-xop


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r322038 - [X86] Replace cvt*2mask intrinsics with native IR using 'icmp slt X, zeroinitializer.

2018-01-08 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Mon Jan  8 14:37:56 2018
New Revision: 322038

URL: http://llvm.org/viewvc/llvm-project?rev=322038&view=rev
Log:
[X86] Replace cvt*2mask intrinsics with native IR using 'icmp slt X, 
zeroinitializer.

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/CodeGen/avx512dq-builtins.c
cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
cfe/trunk/test/CodeGen/avx512vldq-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=322038&r1=322037&r2=322038&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jan  8 14:37:56 2018
@@ -7791,7 +7791,9 @@ static Value *EmitX86Select(CodeGenFunct
 }
 
 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
-   bool Signed, SmallVectorImpl &Ops) 
{
+   bool Signed, ArrayRef Ops) {
+  assert((Ops.size() == 2 || Ops.size() == 4) &&
+ "Unexpected number of arguments");
   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
   Value *Cmp;
 
@@ -7815,9 +7817,11 @@ static Value *EmitX86MaskedCompare(CodeG
 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
   }
 
-  const auto *C = dyn_cast(Ops.back());
-  if (!C || !C->isAllOnesValue())
-Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), 
NumElts));
+  if (Ops.size() == 4) {
+const auto *C = dyn_cast(Ops[3]);
+if (!C || !C->isAllOnesValue())
+  Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops[3], NumElts));
+  }
 
   if (NumElts < 8) {
 uint32_t Indices[8];
@@ -7833,6 +7837,11 @@ static Value *EmitX86MaskedCompare(CodeG
 std::max(NumElts, 8U)));
 }
 
+static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
+  Value *Zero = Constant::getNullValue(In->getType());
+  return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
+}
+
 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef Ops) {
 
   llvm::Type *Ty = Ops[0]->getType();
@@ -8179,6 +8188,20 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   case X86::BI__builtin_ia32_cvtmask2q512:
 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
 
+  case X86::BI__builtin_ia32_cvtb2mask128:
+  case X86::BI__builtin_ia32_cvtb2mask256:
+  case X86::BI__builtin_ia32_cvtb2mask512:
+  case X86::BI__builtin_ia32_cvtw2mask128:
+  case X86::BI__builtin_ia32_cvtw2mask256:
+  case X86::BI__builtin_ia32_cvtw2mask512:
+  case X86::BI__builtin_ia32_cvtd2mask128:
+  case X86::BI__builtin_ia32_cvtd2mask256:
+  case X86::BI__builtin_ia32_cvtd2mask512:
+  case X86::BI__builtin_ia32_cvtq2mask128:
+  case X86::BI__builtin_ia32_cvtq2mask256:
+  case X86::BI__builtin_ia32_cvtq2mask512:
+return EmitX86ConvertToMask(*this, Ops[0]);
+
   case X86::BI__builtin_ia32_movdqa32store128_mask:
   case X86::BI__builtin_ia32_movdqa64store128_mask:
   case X86::BI__builtin_ia32_storeaps128_mask:

Modified: cfe/trunk/test/CodeGen/avx512bw-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512bw-builtins.c?rev=322038&r1=322037&r2=322038&view=diff
==
--- cfe/trunk/test/CodeGen/avx512bw-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512bw-builtins.c Mon Jan  8 14:37:56 2018
@@ -1743,7 +1743,8 @@ __mmask32 test_mm512_mask_testn_epi16_ma
 
 __mmask64 test_mm512_movepi8_mask(__m512i __A) {
   // CHECK-LABEL: @test_mm512_movepi8_mask
-  // CHECK: @llvm.x86.avx512.cvtb2mask.512
+  // CHECK: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer
+  // CHECK: bitcast <64 x i1> [[CMP]] to i64
   return _mm512_movepi8_mask(__A); 
 }
 
@@ -1941,7 +1942,8 @@ __m512i test_mm512_sad_epu8(__m512i __A,
 
 __mmask32 test_mm512_movepi16_mask(__m512i __A) {
   // CHECK-LABEL: @test_mm512_movepi16_mask
-  // CHECK: @llvm.x86.avx512.cvtw2mask.512
+  // CHECK: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer
+  // CHECK: bitcast <32 x i1> [[CMP]] to i32
   return _mm512_movepi16_mask(__A); 
 }
 

Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=322038&r1=322037&r2=322038&view=diff
==
--- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Mon Jan  8 14:37:56 2018
@@ -923,7 +923,8 @@ __m128d test_mm_maskz_reduce_round_sd(__
 
 __mmask16 test_mm512_movepi32_mask(__m512i __A) {
   // CHECK-LABEL: @test_mm512_movepi32_mask
-  // CHECK: @llvm.x86.avx512.cvtd2mask.512
+  // CHECK: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer
+  // CHECK: bitcast <16 x i1> [[CMP]] to i16
   return

r322244 - [X86][Sema] Remove constant range checks on on builtins that take a char.

2018-01-10 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Jan 10 17:37:57 2018
New Revision: 322244

URL: http://llvm.org/viewvc/llvm-project?rev=322244&view=rev
Log:
[X86][Sema] Remove constant range checks on on builtins that take a char.

The constant is already reduced to 8-bits by the time we get here and the 
checks were just ensuring that it was 8 bits. Thus I don't think there's anyway 
for them to fail.

Modified:
cfe/trunk/lib/Sema/SemaChecking.cpp

Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=322244&r1=322243&r2=322244&view=diff
==
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Wed Jan 10 17:37:57 2018
@@ -2361,13 +2361,6 @@ bool Sema::CheckX86BuiltinFunctionCall(u
   case X86::BI__builtin_ia32_cmpss_mask:
 i = 2; l = 0; u = 31;
 break;
-  case X86::BI__builtin_ia32_xabort:
-i = 0; l = -128; u = 255;
-break;
-  case X86::BI__builtin_ia32_pshufw:
-  case X86::BI__builtin_ia32_aeskeygenassist128:
-i = 1; l = -128; u = 255;
-break;
   case X86::BI__builtin_ia32_vcvtps2ph:
   case X86::BI__builtin_ia32_vcvtps2ph_mask:
   case X86::BI__builtin_ia32_vcvtps2ph256:
@@ -2405,27 +2398,6 @@ bool Sema::CheckX86BuiltinFunctionCall(u
   case X86::BI__builtin_ia32_fpcla_mask:
 i = 1; l = 0; u = 255;
 break;
-  case X86::BI__builtin_ia32_palignr:
-  case X86::BI__builtin_ia32_insertps128:
-  case X86::BI__builtin_ia32_dpps:
-  case X86::BI__builtin_ia32_dppd:
-  case X86::BI__builtin_ia32_dpps256:
-  case X86::BI__builtin_ia32_mpsadbw128:
-  case X86::BI__builtin_ia32_mpsadbw256:
-  case X86::BI__builtin_ia32_pcmpistrm128:
-  case X86::BI__builtin_ia32_pcmpistri128:
-  case X86::BI__builtin_ia32_pcmpistria128:
-  case X86::BI__builtin_ia32_pcmpistric128:
-  case X86::BI__builtin_ia32_pcmpistrio128:
-  case X86::BI__builtin_ia32_pcmpistris128:
-  case X86::BI__builtin_ia32_pcmpistriz128:
-  case X86::BI__builtin_ia32_pclmulqdq128:
-  case X86::BI__builtin_ia32_vperm2f128_pd256:
-  case X86::BI__builtin_ia32_vperm2f128_ps256:
-  case X86::BI__builtin_ia32_vperm2f128_si256:
-  case X86::BI__builtin_ia32_permti256:
-i = 2; l = -128; u = 255;
-break;
   case X86::BI__builtin_ia32_palignr128:
   case X86::BI__builtin_ia32_palignr256:
   case X86::BI__builtin_ia32_palignr512_mask:
@@ -2480,15 +2452,6 @@ bool Sema::CheckX86BuiltinFunctionCall(u
   case X86::BI__builtin_ia32_scatterpfqps:
 i = 4; l = 2; u = 3;
 break;
-  case X86::BI__builtin_ia32_pcmpestrm128:
-  case X86::BI__builtin_ia32_pcmpestri128:
-  case X86::BI__builtin_ia32_pcmpestria128:
-  case X86::BI__builtin_ia32_pcmpestric128:
-  case X86::BI__builtin_ia32_pcmpestrio128:
-  case X86::BI__builtin_ia32_pcmpestris128:
-  case X86::BI__builtin_ia32_pcmpestriz128:
-i = 4; l = -128; u = 255;
-break;
   case X86::BI__builtin_ia32_rndscalesd_round_mask:
   case X86::BI__builtin_ia32_rndscaless_round_mask:
 i = 4; l = 0; u = 255;


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r322245 - [X86] Make -mavx512f imply -mfma and -mf16c in the frontend like it does in the backend.

2018-01-10 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Jan 10 17:37:59 2018
New Revision: 322245

URL: http://llvm.org/viewvc/llvm-project?rev=322245&view=rev
Log:
[X86] Make -mavx512f imply -mfma and -mf16c in the frontend like it does in the 
backend.

Similarly, make -mno-fma and -mno-f16c imply -mno-avx512f.

Withou this  "-mno-sse -mavx512f" ends up with avx512f being enabled in the 
frontend but disabled in the backend.

Modified:
cfe/trunk/lib/Basic/Targets/X86.cpp

Modified: cfe/trunk/lib/Basic/Targets/X86.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.cpp?rev=322245&r1=322244&r2=322245&view=diff
==
--- cfe/trunk/lib/Basic/Targets/X86.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/X86.cpp Wed Jan 10 17:37:59 2018
@@ -430,7 +430,7 @@ void X86TargetInfo::setSSELevel(llvm::St
   if (Enabled) {
 switch (Level) {
 case AVX512F:
-  Features["avx512f"] = true;
+  Features["avx512f"] = Features["fma"] = Features["f16c"] = true;
   LLVM_FALLTHROUGH;
 case AVX2:
   Features["avx2"] = true;
@@ -644,6 +644,8 @@ void X86TargetInfo::setFeatureEnabledImp
   } else if (Name == "fma") {
 if (Enabled)
   setSSELevel(Features, AVX, Enabled);
+else
+  setSSELevel(Features, AVX512F, Enabled);
   } else if (Name == "fma4") {
 setXOPLevel(Features, FMA4, Enabled);
   } else if (Name == "xop") {
@@ -653,6 +655,8 @@ void X86TargetInfo::setFeatureEnabledImp
   } else if (Name == "f16c") {
 if (Enabled)
   setSSELevel(Features, AVX, Enabled);
+else
+  setSSELevel(Features, AVX512F, Enabled);
   } else if (Name == "sha") {
 if (Enabled)
   setSSELevel(Features, SSE2, Enabled);


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r322247 - [X86][Sema] Range check the constant argument for the vpshld/vpshrd builtins to ensure it fits in 8-bits.

2018-01-10 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Jan 10 17:38:02 2018
New Revision: 322247

URL: http://llvm.org/viewvc/llvm-project?rev=322247&view=rev
Log:
[X86][Sema] Range check the constant argument for the vpshld/vpshrd builtins to 
ensure it fits in 8-bits.

Modified:
cfe/trunk/lib/Sema/SemaChecking.cpp
cfe/trunk/test/Sema/builtins-x86.c

Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=322247&r1=322246&r2=322247&view=diff
==
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Wed Jan 10 17:38:02 2018
@@ -2410,6 +2410,24 @@ bool Sema::CheckX86BuiltinFunctionCall(u
   case X86::BI__builtin_ia32_dbpsadbw128_mask:
   case X86::BI__builtin_ia32_dbpsadbw256_mask:
   case X86::BI__builtin_ia32_dbpsadbw512_mask:
+  case X86::BI__builtin_ia32_vpshldd128_mask:
+  case X86::BI__builtin_ia32_vpshldd256_mask:
+  case X86::BI__builtin_ia32_vpshldd512_mask:
+  case X86::BI__builtin_ia32_vpshldq128_mask:
+  case X86::BI__builtin_ia32_vpshldq256_mask:
+  case X86::BI__builtin_ia32_vpshldq512_mask:
+  case X86::BI__builtin_ia32_vpshldw128_mask:
+  case X86::BI__builtin_ia32_vpshldw256_mask:
+  case X86::BI__builtin_ia32_vpshldw512_mask:
+  case X86::BI__builtin_ia32_vpshrdd128_mask:
+  case X86::BI__builtin_ia32_vpshrdd256_mask:
+  case X86::BI__builtin_ia32_vpshrdd512_mask:
+  case X86::BI__builtin_ia32_vpshrdq128_mask:
+  case X86::BI__builtin_ia32_vpshrdq256_mask:
+  case X86::BI__builtin_ia32_vpshrdq512_mask:
+  case X86::BI__builtin_ia32_vpshrdw128_mask:
+  case X86::BI__builtin_ia32_vpshrdw256_mask:
+  case X86::BI__builtin_ia32_vpshrdw512_mask:
 i = 2; l = 0; u = 255;
 break;
   case X86::BI__builtin_ia32_fixupimmpd512_mask:

Modified: cfe/trunk/test/Sema/builtins-x86.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/builtins-x86.c?rev=322247&r1=322246&r2=322247&view=diff
==
--- cfe/trunk/test/Sema/builtins-x86.c (original)
+++ cfe/trunk/test/Sema/builtins-x86.c Wed Jan 10 17:38:02 2018
@@ -4,12 +4,17 @@ typedef long long __m128i __attribute__(
 typedef float __m128 __attribute__((__vector_size__(16)));
 typedef double __m128d __attribute__((__vector_size__(16)));
 
+typedef long long __m256i __attribute__((__vector_size__(32)));
+typedef float __m256 __attribute__((__vector_size__(32)));
+typedef double __m256d __attribute__((__vector_size__(32)));
+
 typedef long long __m512i __attribute__((__vector_size__(64)));
 typedef float __m512 __attribute__((__vector_size__(64)));
 typedef double __m512d __attribute__((__vector_size__(64)));
 
 typedef unsigned char __mmask8;
 typedef unsigned short __mmask16;
+typedef unsigned int __mmask32;
 
 __m128 test__builtin_ia32_cmpps(__m128 __a, __m128 __b) {
   __builtin_ia32_cmpps(__a, __b, 32); // expected-error {{argument should be a 
value from 0 to 31}}
@@ -83,3 +88,74 @@ __m512 _mm512_mask_prefetch_i32gather_ps
   return __builtin_ia32_gatherpfdps(mask, index, addr, 1, 1); // 
expected-error {{argument should be a value from 2 to 3}}
 }
 
+__m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, 
__m512i __B) {
+  return __builtin_ia32_vpshldq512_mask(__A, __B, 1024, __S, __U); // 
expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, 
__m512i __B) {
+  return __builtin_ia32_vpshldd512_mask(__A, __B, 1024, __S, __U); // 
expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, 
__m512i __B) {
+  return __builtin_ia32_vpshldw512_mask(__A, __B, 1024, __S, __U); // 
expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, 
__m512i __B) {
+  return __builtin_ia32_vpshrdq512_mask(__A, __B, 1024, __S, __U); // 
expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, 
__m512i __B) {
+  return __builtin_ia32_vpshrdd512_mask(__A, __B, 1024, __S, __U); // 
expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, 
__m512i __B) {
+  return __builtin_ia32_vpshrdw512_mask(__A, __B, 1024, __S, __U); // 
expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, 
__m256i __B) {
+  return __builtin_ia32_vpshldq256_mask(__A, __B, 1024, __S, __U); // 
expected-error {{argument should be a value from 0 to 255}}
+}
+
+__m128i test_mm128_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, 
__m128i __B) {
+  return __builtin_ia32_vpshldq128_mask(__A, __B, 1024, __S, __U); // 
expected-er

r322246 - [X86] Fix vpshrd builtins to require an ICE for their constant argument to match vpshld.

2018-01-10 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed Jan 10 17:38:00 2018
New Revision: 322246

URL: http://llvm.org/viewvc/llvm-project?rev=322246&view=rev
Log:
[X86] Fix vpshrd builtins to require an ICE for their constant argument to 
match vpshld.

Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=322246&r1=322245&r2=322246&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Jan 10 17:38:00 2018
@@ -1357,15 +1357,15 @@ TARGET_BUILTIN(__builtin_ia32_vpshrdvw12
 TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "", 
"avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "", 
"avx512vbmi2")
 
-TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iiV4iUc", "", 
"avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iiV8iUc", "", 
"avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iiV16iUs", "", 
"avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiiV2LLiUc", "", 
"avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiiV4LLiUc", "", 
"avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiiV8LLiUc", "", 
"avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8siV8sUc", "", 
"avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16siV16sUs", "", 
"avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32siV32sUi", "", 
"avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iIiV4iUc", "", 
"avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iIiV8iUc", "", 
"avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iIiV16iUs", "", 
"avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "", 
"avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "", 
"avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "", 
"avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8sIiV8sUc", "", 
"avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16sIiV16sUs", "", 
"avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32sIiV32sUi", "", 
"avx512vbmi2")
 
 TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "", 
"avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "", 
"avx512bw")


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r326807 - [X86] Fix typo in cpuid.h, bit_AVX51SER->bit_AVX512ER.

2018-03-06 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Tue Mar  6 08:06:44 2018
New Revision: 326807

URL: http://llvm.org/viewvc/llvm-project?rev=326807&view=rev
Log:
[X86] Fix typo in cpuid.h, bit_AVX51SER->bit_AVX512ER.

Modified:
cfe/trunk/lib/Headers/cpuid.h

Modified: cfe/trunk/lib/Headers/cpuid.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/cpuid.h?rev=326807&r1=326806&r2=326807&view=diff
==
--- cfe/trunk/lib/Headers/cpuid.h (original)
+++ cfe/trunk/lib/Headers/cpuid.h Tue Mar  6 08:06:44 2018
@@ -166,7 +166,7 @@
 #define bit_CLFLUSHOPT  0x0080
 #define bit_CLWB0x0100
 #define bit_AVX512PF0x0400
-#define bit_AVX51SER0x0800
+#define bit_AVX512ER0x0800
 #define bit_AVX512CD0x1000
 #define bit_SHA 0x2000
 #define bit_AVX512BW0x4000


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333563 - [X86] Remove 'return' from a bunch of intrinsics that return void and use a builtin that returns void.

2018-05-30 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed May 30 10:23:45 2018
New Revision: 333563

URL: http://llvm.org/viewvc/llvm-project?rev=333563&view=rev
Log:
[X86] Remove 'return' from a bunch of intrinsics that return void and use a 
builtin that returns void.

Found by running the intrinsic headers through -pedantic -ansi.

Modified:
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/lib/Headers/fxsrintrin.h
cfe/trunk/lib/Headers/ia32intrin.h
cfe/trunk/lib/Headers/immintrin.h
cfe/trunk/lib/Headers/pkuintrin.h
cfe/trunk/lib/Headers/xmmintrin.h
cfe/trunk/lib/Headers/xsaveintrin.h
cfe/trunk/lib/Headers/xsaveoptintrin.h

Modified: cfe/trunk/lib/Headers/avx512vlintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=333563&r1=333562&r2=333563&view=diff
==
--- cfe/trunk/lib/Headers/avx512vlintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlintrin.h Wed May 30 10:23:45 2018
@@ -7432,7 +7432,7 @@ _mm256_maskz_cvtusepi64_epi16 (__mmask8
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
 {
-  return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, 
__M);
+  __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/emmintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=333563&r1=333562&r2=333563&view=diff
==
--- cfe/trunk/lib/Headers/emmintrin.h (original)
+++ cfe/trunk/lib/Headers/emmintrin.h Wed May 30 10:23:45 2018
@@ -1979,7 +1979,7 @@ _mm_store1_pd(double *__dp, __m128d __a)
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store_pd1(double *__dp, __m128d __a)
 {
-  return _mm_store1_pd(__dp, __a);
+  _mm_store1_pd(__dp, __a);
 }
 
 /// Stores a 128-bit vector of [2 x double] into an unaligned memory

Modified: cfe/trunk/lib/Headers/fxsrintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/fxsrintrin.h?rev=333563&r1=333562&r2=333563&view=diff
==
--- cfe/trunk/lib/Headers/fxsrintrin.h (original)
+++ cfe/trunk/lib/Headers/fxsrintrin.h Wed May 30 10:23:45 2018
@@ -43,7 +43,7 @@
 static __inline__ void __DEFAULT_FN_ATTRS
 _fxsave(void *__p)
 {
-  return __builtin_ia32_fxsave(__p);
+  __builtin_ia32_fxsave(__p);
 }
 
 /// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte
@@ -61,7 +61,7 @@ _fxsave(void *__p)
 static __inline__ void __DEFAULT_FN_ATTRS
 _fxrstor(void *__p)
 {
-  return __builtin_ia32_fxrstor(__p);
+  __builtin_ia32_fxrstor(__p);
 }
 
 #ifdef __x86_64__
@@ -78,7 +78,7 @@ _fxrstor(void *__p)
 static __inline__ void __DEFAULT_FN_ATTRS
 _fxsave64(void *__p)
 {
-  return __builtin_ia32_fxsave64(__p);
+  __builtin_ia32_fxsave64(__p);
 }
 
 /// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte
@@ -96,7 +96,7 @@ _fxsave64(void *__p)
 static __inline__ void __DEFAULT_FN_ATTRS
 _fxrstor64(void *__p)
 {
-  return __builtin_ia32_fxrstor64(__p);
+  __builtin_ia32_fxrstor64(__p);
 }
 #endif
 

Modified: cfe/trunk/lib/Headers/ia32intrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/ia32intrin.h?rev=333563&r1=333562&r2=333563&view=diff
==
--- cfe/trunk/lib/Headers/ia32intrin.h (original)
+++ cfe/trunk/lib/Headers/ia32intrin.h Wed May 30 10:23:45 2018
@@ -72,7 +72,7 @@ __rdtscp(unsigned int *__A) {
 
 static __inline__ void __attribute__((__always_inline__, __nodebug__))
 _wbinvd(void) {
-  return __builtin_ia32_wbinvd();
+  __builtin_ia32_wbinvd();
 }
 
 #endif /* __IA32INTRIN_H */

Modified: cfe/trunk/lib/Headers/immintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/immintrin.h?rev=333563&r1=333562&r2=333563&view=diff
==
--- cfe/trunk/lib/Headers/immintrin.h (original)
+++ cfe/trunk/lib/Headers/immintrin.h Wed May 30 10:23:45 2018
@@ -282,25 +282,25 @@ _readgsbase_u64(void)
 static __inline__ void __attribute__((__always_inline__, __nodebug__, 
__target__("fsgsbase")))
 _writefsbase_u32(unsigned int __V)
 {
-  return __builtin_ia32_wrfsbase32(__V);
+  __builtin_ia32_wrfsbase32(__V);
 }
 
 static __inline__ void __attribute__((__always_inline__, __nodebug__, 
__target__("fsgsbase")))
 _writefsbase_u64(unsigned long long __V)
 {
-  return __builtin_ia32_wrfsbase64(__V);
+  __builtin_ia32_wrfsbase64(__V);
 }
 
 static __inline__ void __attribute__((__always_inline__, __nodebug__, 
__target__("fsgsbase")))
 _writegsbase_u32(unsigned int __V)
 {
-  return __builtin_ia32_wrgsbase32(__V);
+  __builtin_ia32_wrgsbase32(__V);
 }
 
 static __inline__ void _

r333568 - [X86] Reduce the number of setzero intrinsics to just the set defined by the Intel Intrinsics Guide.

2018-05-30 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed May 30 11:02:11 2018
New Revision: 333568

URL: http://llvm.org/viewvc/llvm-project?rev=333568&view=rev
Log:
[X86] Reduce the number of setzero intrinsics to just the set defined by the 
Intel Intrinsics Guide.

We had quite a few for different element sizes of integers sometimes with 
strange target features attached to them.

We only need a single version for each of _m128i, _m256i, and _m512i with the 
target feature that first introduced those types.

Modified:
cfe/trunk/lib/Headers/avx512bitalgintrin.h
cfe/trunk/lib/Headers/avx512bwintrin.h
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vbmi2intrin.h
cfe/trunk/lib/Headers/avx512vlbwintrin.h
cfe/trunk/lib/Headers/avx512vlcdintrin.h
cfe/trunk/lib/Headers/avx512vldqintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/lib/Headers/gfniintrin.h
cfe/trunk/lib/Headers/mmintrin.h
cfe/trunk/test/CodeGen/avx512bw-builtins.c
cfe/trunk/test/Headers/x86intrin-2.c

Modified: cfe/trunk/lib/Headers/avx512bitalgintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bitalgintrin.h?rev=333568&r1=333567&r2=333568&view=diff
==
--- cfe/trunk/lib/Headers/avx512bitalgintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bitalgintrin.h Wed May 30 11:02:11 2018
@@ -48,7 +48,7 @@ _mm512_mask_popcnt_epi16(__m512i __A, __
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
 {
-  return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(),
+  return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(),
   __U,
   __B);
 }
@@ -70,7 +70,7 @@ _mm512_mask_popcnt_epi8(__m512i __A, __m
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B)
 {
-  return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(),
+  return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(),
   __U,
   __B);
 }

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=333568&r1=333567&r2=333568&view=diff
==
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Wed May 30 11:02:11 2018
@@ -34,26 +34,6 @@ typedef unsigned long long __mmask64;
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, 
__target__("avx512bw")))
 
-static  __inline __m512i __DEFAULT_FN_ATTRS
-_mm512_setzero_qi(void) {
-  return (__m512i)(__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0 };
-}
-
-static  __inline __m512i __DEFAULT_FN_ATTRS
-_mm512_setzero_hi(void) {
-  return (__m512i)(__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0 };
-}
-
 /* Integer compare */
 
 #define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
@@ -212,7 +192,7 @@ static __inline__ __m512i __DEFAULT_FN_A
 _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
  (__v64qi)_mm512_add_epi8(__A, 
__B),
- (__v64qi)_mm512_setzero_qi());
+ (__v64qi)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -231,7 +211,7 @@ static __inline__ __m512i __DEFAULT_FN_A
 _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
  (__v64qi)_mm512_sub_epi8(__A, 
__B),
- (__v64qi)_mm512_setzero_qi());
+ (__v64qi)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -250,7 +230,7 @@ static __inline__ __m512i __DEFAULT_FN_A
 _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
  (__v32hi)_mm512_add_epi16(__A, 
__B),
- (__v32hi)_mm512_setzero_hi());
+ (__v32hi)_mm512_setzer

r333572 - [X86] Simplify the implementation of _mm_sqrt_ss, _mm_rcp_ss, and _mm_rsqrt_ss.

2018-05-30 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed May 30 11:27:07 2018
New Revision: 333572

URL: http://llvm.org/viewvc/llvm-project?rev=333572&view=rev
Log:
[X86] Simplify the implementation of _mm_sqrt_ss, _mm_rcp_ss, and _mm_rsqrt_ss.

We don't need the insertion back into the original vector at the end. The 
builtin already understands that.

This is different than _mm_sqrt_sd which takes two arguments and we do need to 
insert.

Modified:
cfe/trunk/lib/Headers/xmmintrin.h
cfe/trunk/test/CodeGen/sse-builtins.c

Modified: cfe/trunk/lib/Headers/xmmintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/xmmintrin.h?rev=333572&r1=333571&r2=333572&view=diff
==
--- cfe/trunk/lib/Headers/xmmintrin.h (original)
+++ cfe/trunk/lib/Headers/xmmintrin.h Wed May 30 11:27:07 2018
@@ -224,8 +224,7 @@ _mm_div_ps(__m128 __a, __m128 __b)
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_sqrt_ss(__m128 __a)
 {
-  __m128 __c = __builtin_ia32_sqrtss((__v4sf)__a);
-  return (__m128) { __c[0], __a[1], __a[2], __a[3] };
+  return (__m128)__builtin_ia32_sqrtss((__v4sf)__a);
 }
 
 /// Calculates the square roots of the values stored in a 128-bit vector
@@ -260,8 +259,7 @@ _mm_sqrt_ps(__m128 __a)
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rcp_ss(__m128 __a)
 {
-  __m128 __c = __builtin_ia32_rcpss((__v4sf)__a);
-  return (__m128) { __c[0], __a[1], __a[2], __a[3] };
+  return (__m128)__builtin_ia32_rcpss((__v4sf)__a);
 }
 
 /// Calculates the approximate reciprocals of the values stored in a
@@ -278,7 +276,7 @@ _mm_rcp_ss(__m128 __a)
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rcp_ps(__m128 __a)
 {
-  return __builtin_ia32_rcpps((__v4sf)__a);
+  return (__m128)__builtin_ia32_rcpps((__v4sf)__a);
 }
 
 /// Calculates the approximate reciprocal of the square root of the value
@@ -297,8 +295,7 @@ _mm_rcp_ps(__m128 __a)
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rsqrt_ss(__m128 __a)
 {
-  __m128 __c = __builtin_ia32_rsqrtss((__v4sf)__a);
-  return (__m128) { __c[0], __a[1], __a[2], __a[3] };
+  return __builtin_ia32_rsqrtss((__v4sf)__a);
 }
 
 /// Calculates the approximate reciprocals of the square roots of the

Modified: cfe/trunk/test/CodeGen/sse-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse-builtins.c?rev=333572&r1=333571&r2=333572&view=diff
==
--- cfe/trunk/test/CodeGen/sse-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse-builtins.c Wed May 30 11:27:07 2018
@@ -508,14 +508,6 @@ __m128 test_mm_rcp_ps(__m128 x) {
 __m128 test_mm_rcp_ss(__m128 x) {
   // CHECK-LABEL: test_mm_rcp_ss
   // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}})
-  // CHECK: extractelement <4 x float> {{.*}}, i32 0
-  // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
-  // CHECK: extractelement <4 x float> {{.*}}, i32 1
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
-  // CHECK: extractelement <4 x float> {{.*}}, i32 2
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
-  // CHECK: extractelement <4 x float> {{.*}}, i32 3
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
   return _mm_rcp_ss(x);
 }
 
@@ -528,14 +520,6 @@ __m128 test_mm_rsqrt_ps(__m128 x) {
 __m128 test_mm_rsqrt_ss(__m128 x) {
   // CHECK-LABEL: test_mm_rsqrt_ss
   // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}})
-  // CHECK: extractelement <4 x float> {{.*}}, i32 0
-  // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
-  // CHECK: extractelement <4 x float> {{.*}}, i32 1
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
-  // CHECK: extractelement <4 x float> {{.*}}, i32 2
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
-  // CHECK: extractelement <4 x float> {{.*}}, i32 3
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
   return _mm_rsqrt_ss(x);
 }
 
@@ -662,14 +646,6 @@ __m128 test_mm_sqrt_ps(__m128 x) {
 __m128 test_sqrt_ss(__m128 x) {
   // CHECK: define {{.*}} @test_sqrt_ss
   // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss
-  // CHECK: extractelement <4 x float> {{.*}}, i32 0
-  // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
-  // CHECK: extractelement <4 x float> {{.*}}, i32 1
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
-  // CHECK: extractelement <4 x float> {{.*}}, i32 2
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
-  // CHECK: extractelement <4 x float> {{.*}}, i32 3
-  // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
   return _mm_sqrt_ss(x);
 }
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333593 - [X86] Add __extension__ to a bunch of places in our intrinsic headers that fail if you run it through -pedantic -ansi.

2018-05-30 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed May 30 14:08:27 2018
New Revision: 333593

URL: http://llvm.org/viewvc/llvm-project?rev=333593&view=rev
Log:
[X86] Add __extension__ to a bunch of places in our intrinsic headers that fail 
if you run it through -pedantic -ansi.

All of these are lines that create a 'compound literal' to concatenate elements 
together.

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avxintrin.h
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/lib/Headers/xmmintrin.h

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=333593&r1=333592&r2=333593&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Wed May 30 14:08:27 2018
@@ -180,7 +180,7 @@ typedef enum
 static  __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_setzero_si512(void)
 {
-  return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
+  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
 }
 
 #define _mm512_setzero_epi32 _mm512_setzero_si512
@@ -262,8 +262,8 @@ _mm512_maskz_broadcastq_epi64 (__mmask8
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_setzero_ps(void)
 {
-  return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+  return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
 }
 
 #define _mm512_setzero _mm512_setzero_ps
@@ -271,49 +271,52 @@ _mm512_setzero_ps(void)
 static  __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_setzero_pd(void)
 {
-  return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+  return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_set1_ps(float __w)
 {
-  return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
-   __w, __w, __w, __w, __w, __w, __w, __w  };
+  return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
+ __w, __w, __w, __w, __w, __w, __w, __w  };
 }
 
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_set1_pd(double __w)
 {
-  return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
+  return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_set1_epi8(char __w)
 {
-  return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
- __w, __w, __w, __w, __w, __w, __w, __w,
- __w, __w, __w, __w, __w, __w, __w, __w,
- __w, __w, __w, __w, __w, __w, __w, __w,
- __w, __w, __w, __w, __w, __w, __w, __w,
- __w, __w, __w, __w, __w, __w, __w, __w,
- __w, __w, __w, __w, __w, __w, __w, __w,
- __w, __w, __w, __w, __w, __w, __w, __w  };
+  return __extension__ (__m512i)(__v64qi){
+__w, __w, __w, __w, __w, __w, __w, __w,
+__w, __w, __w, __w, __w, __w, __w, __w,
+__w, __w, __w, __w, __w, __w, __w, __w,
+__w, __w, __w, __w, __w, __w, __w, __w,
+__w, __w, __w, __w, __w, __w, __w, __w,
+__w, __w, __w, __w, __w, __w, __w, __w,
+__w, __w, __w, __w, __w, __w, __w, __w,
+__w, __w, __w, __w, __w, __w, __w, __w  };
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_set1_epi16(short __w)
 {
-  return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
- __w, __w, __w, __w, __w, __w, __w, __w,
- __w, __w, __w, __w, __w, __w, __w, __w,
- __w, __w, __w, __w, __w, __w, __w, __w };
+  return __extension__ (__m512i)(__v32hi){
+__w, __w, __w, __w, __w, __w, __w, __w,
+__w, __w, __w, __w, __w, __w, __w, __w,
+__w, __w, __w, __w, __w, __w, __w, __w,
+__w, __w, __w, __w, __w, __w, __w, __w };
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_set1_epi32(int __s)
 {
-  return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
- __s, __s, __s, __s, __s, __s, __s, __s };
+  return __extension__ (__m512i)(__v16si){
+__s, __s, __s, __s, __s, __s, __s, __s,
+__s, __s, __s, __s, __s, __s, __s, __s };
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS
@@ -327,7 +330,7 @@ _mm512_maskz_set1_epi32(__mmask16 __M, i
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_set1_epi64(long long __d)
 {
-  return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
+  return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, 
__d };
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS
@@ -349,7 +352,7 @@ _mm512_broadcastss_ps(__m128 __A)
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_set4_epi32 (int __A, int __B, int __C, int __D

r333603 - [X86] Use C style comments in intrinsic headers for overall consistency.

2018-05-30 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed May 30 15:33:21 2018
New Revision: 333603

URL: http://llvm.org/viewvc/llvm-project?rev=333603&view=rev
Log:
[X86] Use C style comments in intrinsic headers for overall consistency.

Most of the origial comments used C style /* */ comments, but some C++ // 
comments had snuck in over time.

Still need to convert all the doxygen comments. Which is much harder to do.

Modified:
cfe/trunk/lib/Headers/avx512erintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/f16cintrin.h
cfe/trunk/lib/Headers/gfniintrin.h
cfe/trunk/lib/Headers/movdirintrin.h
cfe/trunk/lib/Headers/vpclmulqdqintrin.h

Modified: cfe/trunk/lib/Headers/avx512erintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512erintrin.h?rev=333603&r1=333602&r2=333603&view=diff
==
--- cfe/trunk/lib/Headers/avx512erintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512erintrin.h Wed May 30 15:33:21 2018
@@ -27,7 +27,7 @@
 #ifndef __AVX512ERINTRIN_H
 #define __AVX512ERINTRIN_H
 
-// exp2a23
+/* exp2a23 */
 #define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \
   (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
   (__v8df)_mm512_setzero_pd(), \
@@ -76,7 +76,7 @@
 #define _mm512_maskz_exp2a23_ps(M, A) \
   _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
 
-// rsqrt28
+/* rsqrt28 */
 #define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \
   (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
  (__v8df)_mm512_setzero_pd(), \
@@ -179,7 +179,7 @@
 #define _mm_maskz_rsqrt28_sd(M, A, B) \
   _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
 
-// rcp28
+/* rcp28 */
 #define _mm512_rcp28_round_pd(A, R) __extension__ ({ \
   (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
(__v8df)_mm512_setzero_pd(), \
@@ -282,4 +282,4 @@
 #define _mm_maskz_rcp28_sd(M, A, B) \
   _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
 
-#endif // __AVX512ERINTRIN_H
+#endif /* __AVX512ERINTRIN_H */

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=333603&r1=333602&r2=333603&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Wed May 30 15:33:21 2018
@@ -9651,19 +9651,20 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8
   return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, 
_mm512_set1_epi64(0x7FFF),(__v8di)__A);
 }
 
-// Vector-reduction arithmetic accepts vectors as inputs and produces scalars 
as
-// outputs. This class of vector operation forms the basis of many scientific
-// computations. In vector-reduction arithmetic, the evaluation off is
-// independent of the order of the input elements of V.
-
-// Used bisection method. At each step, we partition the vector with previous
-// step in half, and the operation is performed on its two halves.
-// This takes log2(n) steps where n is the number of elements in the vector.
-
-// Vec512 - Vector with size 512.
-// Operator - Can be one of following: +,*,&,|
-// T2  - Can get 'i' for int and 'f' for float.
-// T1 - Can get 'i' for int and 'd' for double.
+/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars 
as
+ * outputs. This class of vector operation forms the basis of many scientific
+ * computations. In vector-reduction arithmetic, the evaluation off is
+ * independent of the order of the input elements of V.
+
+ * Used bisection method. At each step, we partition the vector with previous
+ * step in half, and the operation is performed on its two halves.
+ * This takes log2(n) steps where n is the number of elements in the vector.
+
+ * Vec512 - Vector with size 512.
+ * Operator - Can be one of following: +,*,&,|
+ * T2  - Can get 'i' for int and 'f' for float.
+ * T1 - Can get 'i' for int and 'd' for double.
+ */
 
 #define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \
   __extension__({  \
@@ -9717,14 +9718,15 @@ static __inline__ double __DEFAULT_FN_AT
   _mm512_reduce_operator_64bit(__W, *, f, d);
 }
 
-// Vec512 - Vector with size 512.
-// Vec512Neutral - All vector elements set to the identity element.
-// Identity element: {+,0},{*,1},{&,0x},{|,0}
-// Operator - Can be one of following: +,*,&,|
-// Mask - Intrinsic Mask
-// T2  - Can get 'i' for int and 'f' for float.
-// T1 - Can get 'i' for int and 'd' for packed double-precision.
-// T3 - Can be Pd for packed double or q for q-word.
+/* Vec512 - Vector with size 512.
+ * Vec512Neutral - All vector elements set to the identity element.
+ * Identity element: {+,0},{*,1},{&,0xF

r333615 - [X86] Fix some places where macro arguments to intrinsics weren't cast to _m512(i|d)/_m256(i|d/_m128(i|d) first.

2018-05-30 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed May 30 18:24:40 2018
New Revision: 333615

URL: http://llvm.org/viewvc/llvm-project?rev=333615&view=rev
Log:
[X86] Fix some places where macro arguments to intrinsics weren't cast to 
_m512(i|d)/_m256(i|d/_m128(i|d) first.

The majority of the cases were correct. This fixes the few that weren't.

I also removed some superfluous parentheses in non-macros that confused by 
attempts at grepping for missing casts.

Modified:
cfe/trunk/lib/Headers/__wmmintrin_pclmul.h
cfe/trunk/lib/Headers/avx512dqintrin.h
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vbmi2intrin.h
cfe/trunk/lib/Headers/avx512vldqintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/lib/Headers/avx512vlvbmi2intrin.h

Modified: cfe/trunk/lib/Headers/__wmmintrin_pclmul.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/__wmmintrin_pclmul.h?rev=333615&r1=333614&r2=333615&view=diff
==
--- cfe/trunk/lib/Headers/__wmmintrin_pclmul.h (original)
+++ cfe/trunk/lib/Headers/__wmmintrin_pclmul.h Wed May 30 18:24:40 2018
@@ -55,8 +55,8 @@
 ///Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used.
 /// \returns The 128-bit integer vector containing the result of the carry-less
 ///multiplication of the selected 64-bit values.
-#define _mm_clmulepi64_si128(__X, __Y, __I) \
-  ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \
-(__v2di)(__m128i)(__Y), (char)(__I)))
+#define _mm_clmulepi64_si128(X, Y, I) \
+  ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \
+(__v2di)(__m128i)(Y), (char)(I)))
 
 #endif /* __WMMINTRIN_PCLMUL_H */

Modified: cfe/trunk/lib/Headers/avx512dqintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=333615&r1=333614&r2=333615&view=diff
==
--- cfe/trunk/lib/Headers/avx512dqintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512dqintrin.h Wed May 30 18:24:40 2018
@@ -1119,7 +1119,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __
 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
(__v8sf)_mm512_extractf32x8_ps((A), (imm)), 
\
-   (__v8sf)(W))
+   (__v8sf)(__m256)(W))
 
 #define _mm512_maskz_extractf32x8_ps(U, A, imm) \
   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
@@ -1135,7 +1135,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __
 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
(__v2df)_mm512_extractf64x2_pd((A), (imm)), 
\
-   (__v2df)(W))
+   (__v2df)(__m128d)(W))
 
 #define _mm512_maskz_extractf64x2_pd(U, A, imm) \
   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
@@ -1157,7 +1157,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __
 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
 (__v8si)_mm512_extracti32x8_epi32((A), (imm)), 
\
-(__v8si)(W))
+(__v8si)(__m256i)(W))
 
 #define _mm512_maskz_extracti32x8_epi32(U, A, imm) \
   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
@@ -1173,7 +1173,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __
 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
   (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
 (__v2di)_mm512_extracti64x2_epi64((A), (imm)), 
\
-(__v2di)(W))
+(__v2di)(__m128i)(W))
 
 #define _mm512_maskz_extracti64x2_epi64(U, A, imm) \
   (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
@@ -1203,7 +1203,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __
 #define _mm512_mask_insertf32x8(W, U, A, B, imm) \
   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
  (__v16sf)_mm512_insertf32x8((A), (B), (imm)), 
\
- (__v16sf)(W))
+ (__v16sf)(__m512)(W))
 
 #define _mm512_maskz_insertf32x8(U, A, B, imm) \
   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
@@ -1225,7 +1225,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __
 #define _mm512_mask_insertf64x2(W, U, A, B, imm) \
   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
   (__v8df)_mm512_insertf64x2((A), (B), (imm)), 
\
-  (__v8df)(W))
+  (__v8df)(__m512d)(W))
 
 #define _mm512_maskz_insertf64x2(U, A, B, imm) \
   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
@@ -1255,7 +1

r333626 - [X86] Make 512-bit unmasked load/store builtins more like their 128/256-bit equivalents.

2018-05-30 Thread Craig Topper via cfe-commits
Author: ctopper
Date: Wed May 30 22:02:08 2018
New Revision: 333626

URL: http://llvm.org/viewvc/llvm-project?rev=333626&view=rev
Log:
[X86] Make 512-bit unmasked load/store builtins more like their 128/256-bit 
equivalents.

Previously we were just passing -1 mask to the masked builtin. This changes it 
to the more generic way that the 128/256 bit use.

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=333626&r1=333625&r2=333626&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Wed May 30 22:02:08 2018
@@ -4590,10 +4590,10 @@ _mm512_maskz_unpacklo_epi64 (__mmask8 __
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_loadu_si512 (void const *__P)
 {
-  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
-  (__v16si)
-  _mm512_setzero_si512 (),
-  (__mmask16) -1);
+  struct __loadu_si512 {
+__m512i __v;
+  } __attribute__((__packed__, __may_alias__));
+  return ((struct __loadu_si512*)__P)->__v;
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS
@@ -4686,10 +4686,7 @@ _mm512_loadu_ps(void const *__p)
 static __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_load_ps(void const *__p)
 {
-  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
-  (__v16sf)
-  _mm512_setzero_ps (),
-  (__mmask16) -1);
+  return *(__m512*)__p;
 }
 
 static __inline __m512 __DEFAULT_FN_ATTRS
@@ -4712,10 +4709,7 @@ _mm512_maskz_load_ps(__mmask16 __U, void
 static __inline __m512d __DEFAULT_FN_ATTRS
 _mm512_load_pd(void const *__p)
 {
-  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
-   (__v8df)
-   _mm512_setzero_pd (),
-   (__mmask8) -1);
+  return *(__m512d*)__p;
 }
 
 static __inline __m512d __DEFAULT_FN_ATTRS
@@ -4765,8 +4759,10 @@ _mm512_mask_storeu_epi64(void *__P, __mm
 static __inline void __DEFAULT_FN_ATTRS
 _mm512_storeu_si512 (void *__P, __m512i __A)
 {
-  __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
-(__mmask16) -1);
+  struct __storeu_si512 {
+__m512i __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_si512*)__P)->__v = __A;
 }
 
 static __inline void __DEFAULT_FN_ATTRS
@@ -4785,7 +4781,10 @@ _mm512_mask_storeu_pd(void *__P, __mmask
 static __inline void __DEFAULT_FN_ATTRS
 _mm512_storeu_pd(void *__P, __m512d __A)
 {
-  __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
+  struct __storeu_pd {
+__m512d __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_pd*)__P)->__v = __A;
 }
 
 static __inline void __DEFAULT_FN_ATTRS
@@ -4798,7 +4797,10 @@ _mm512_mask_storeu_ps(void *__P, __mmask
 static __inline void __DEFAULT_FN_ATTRS
 _mm512_storeu_ps(void *__P, __m512 __A)
 {
-  __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
+  struct __storeu_ps {
+__m512 __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __storeu_ps*)__P)->__v = __A;
 }
 
 static __inline void __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=333626&r1=333625&r2=333626&view=diff
==
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Wed May 30 22:02:08 2018
@@ -159,7 +159,7 @@ __m512d test_mm512_mul_pd(__m512d a, __m
 void test_mm512_storeu_si512 (void *__P, __m512i __A)
 {
   // CHECK-LABEL: @test_mm512_storeu_si512
-  // CHECK: store <16 x i32> %{{.*}}, <16 x i32>* %{{.*}}, align 1{{$}}
+  // CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 1{{$}}
   // CHECK-NEXT: ret void
   _mm512_storeu_si512 ( __P,__A);
 }
@@ -253,7 +253,7 @@ void test_mm512_mask_storeu_epi64(void *
 __m512i test_mm512_loadu_si512 (void *__P)
 {
   // CHECK-LABEL: @test_mm512_loadu_si512 
-  // CHECK: load <16 x i32>, <16 x i32>* %{{.*}}, align 1{{$}}
+  // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 1{{$}}
   return _mm512_loadu_si512 ( __P);
 }
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


  1   2   3   4   5   6   7   8   9   10   >