[PATCH] D132742: [X86][BF16] Add type mangling for Windows

2022-08-29 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe accepted this revision.
FreddyYe added a comment.
This revision is now accepted and ready to land.

LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132742/new/

https://reviews.llvm.org/D132742

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133094: [X86] Add missing key feature for core2

2022-08-31 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
Herald added a subscriber: hiraditya.
Herald added a project: All.
FreddyYe requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D133094

Files:
  clang/test/CodeGen/attr-target-mv.c
  llvm/lib/Support/X86TargetParser.cpp


Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -321,7 +321,7 @@
   { {"prescott"}, CK_Prescott, ~0U, FeaturesPrescott },
   { {"nocona"}, CK_Nocona, ~0U, FeaturesNocona },
   // Core microarchitecture based processors.
-  { {"core2"}, CK_Core2, ~0U, FeaturesCore2 },
+  { {"core2"}, CK_Core2, FEATURE_SSSE3, FeaturesCore2 },
   { {"penryn"}, CK_Penryn, ~0U, FeaturesPenryn },
   // Atom processors
   { {"bonnell"}, CK_Bonnell, FEATURE_SSSE3, FeaturesBonnell },
Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -14,6 +14,7 @@
 int __attribute__((target("arch=sapphirerapids"))) foo(void) {return 10;}
 int __attribute__((target("arch=alderlake"))) foo(void) {return 11;}
 int __attribute__((target("arch=rocketlake"))) foo(void) {return 12;}
+int __attribute__((target("arch=core2"))) foo(void) {return 13;}
 int __attribute__((target("default"))) foo(void) { return 2; }
 
 int bar(void) {
@@ -146,6 +147,8 @@
 // LINUX: ret i32 11
 // LINUX: define{{.*}} i32 @foo.arch_rocketlake()
 // LINUX: ret i32 12
+// LINUX: define{{.*}} i32 @foo.arch_core2()
+// LINUX: ret i32 13
 // LINUX: define{{.*}} i32 @foo()
 // LINUX: ret i32 2
 // LINUX: define{{.*}} i32 @bar()
@@ -175,6 +178,8 @@
 // WINDOWS: ret i32 11
 // WINDOWS: define dso_local i32 @foo.arch_rocketlake()
 // WINDOWS: ret i32 12
+// WINDOWS: define dso_local i32 @foo.arch_core2()
+// WINDOWS: ret i32 13
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()


Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -321,7 +321,7 @@
   { {"prescott"}, CK_Prescott, ~0U, FeaturesPrescott },
   { {"nocona"}, CK_Nocona, ~0U, FeaturesNocona },
   // Core microarchitecture based processors.
-  { {"core2"}, CK_Core2, ~0U, FeaturesCore2 },
+  { {"core2"}, CK_Core2, FEATURE_SSSE3, FeaturesCore2 },
   { {"penryn"}, CK_Penryn, ~0U, FeaturesPenryn },
   // Atom processors
   { {"bonnell"}, CK_Bonnell, FEATURE_SSSE3, FeaturesBonnell },
Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -14,6 +14,7 @@
 int __attribute__((target("arch=sapphirerapids"))) foo(void) {return 10;}
 int __attribute__((target("arch=alderlake"))) foo(void) {return 11;}
 int __attribute__((target("arch=rocketlake"))) foo(void) {return 12;}
+int __attribute__((target("arch=core2"))) foo(void) {return 13;}
 int __attribute__((target("default"))) foo(void) { return 2; }
 
 int bar(void) {
@@ -146,6 +147,8 @@
 // LINUX: ret i32 11
 // LINUX: define{{.*}} i32 @foo.arch_rocketlake()
 // LINUX: ret i32 12
+// LINUX: define{{.*}} i32 @foo.arch_core2()
+// LINUX: ret i32 13
 // LINUX: define{{.*}} i32 @foo()
 // LINUX: ret i32 2
 // LINUX: define{{.*}} i32 @bar()
@@ -175,6 +178,8 @@
 // WINDOWS: ret i32 11
 // WINDOWS: define dso_local i32 @foo.arch_rocketlake()
 // WINDOWS: ret i32 12
+// WINDOWS: define dso_local i32 @foo.arch_core2()
+// WINDOWS: ret i32 13
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D133094: [X86] Add missing key feature for core2

2022-09-02 Thread Freddy, Ye via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG66f332bc1ac0: [X86] Add missing key feature for core2 
(authored by FreddyYe).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D133094/new/

https://reviews.llvm.org/D133094

Files:
  clang/test/CodeGen/attr-target-mv.c
  llvm/lib/Support/X86TargetParser.cpp


Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -321,7 +321,7 @@
   { {"prescott"}, CK_Prescott, ~0U, FeaturesPrescott },
   { {"nocona"}, CK_Nocona, ~0U, FeaturesNocona },
   // Core microarchitecture based processors.
-  { {"core2"}, CK_Core2, ~0U, FeaturesCore2 },
+  { {"core2"}, CK_Core2, FEATURE_SSSE3, FeaturesCore2 },
   { {"penryn"}, CK_Penryn, ~0U, FeaturesPenryn },
   // Atom processors
   { {"bonnell"}, CK_Bonnell, FEATURE_SSSE3, FeaturesBonnell },
Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -14,6 +14,7 @@
 int __attribute__((target("arch=sapphirerapids"))) foo(void) {return 10;}
 int __attribute__((target("arch=alderlake"))) foo(void) {return 11;}
 int __attribute__((target("arch=rocketlake"))) foo(void) {return 12;}
+int __attribute__((target("arch=core2"))) foo(void) {return 13;}
 int __attribute__((target("default"))) foo(void) { return 2; }
 
 int bar(void) {
@@ -146,6 +147,8 @@
 // LINUX: ret i32 11
 // LINUX: define{{.*}} i32 @foo.arch_rocketlake()
 // LINUX: ret i32 12
+// LINUX: define{{.*}} i32 @foo.arch_core2()
+// LINUX: ret i32 13
 // LINUX: define{{.*}} i32 @foo()
 // LINUX: ret i32 2
 // LINUX: define{{.*}} i32 @bar()
@@ -175,6 +178,8 @@
 // WINDOWS: ret i32 11
 // WINDOWS: define dso_local i32 @foo.arch_rocketlake()
 // WINDOWS: ret i32 12
+// WINDOWS: define dso_local i32 @foo.arch_core2()
+// WINDOWS: ret i32 13
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()


Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -321,7 +321,7 @@
   { {"prescott"}, CK_Prescott, ~0U, FeaturesPrescott },
   { {"nocona"}, CK_Nocona, ~0U, FeaturesNocona },
   // Core microarchitecture based processors.
-  { {"core2"}, CK_Core2, ~0U, FeaturesCore2 },
+  { {"core2"}, CK_Core2, FEATURE_SSSE3, FeaturesCore2 },
   { {"penryn"}, CK_Penryn, ~0U, FeaturesPenryn },
   // Atom processors
   { {"bonnell"}, CK_Bonnell, FEATURE_SSSE3, FeaturesBonnell },
Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -14,6 +14,7 @@
 int __attribute__((target("arch=sapphirerapids"))) foo(void) {return 10;}
 int __attribute__((target("arch=alderlake"))) foo(void) {return 11;}
 int __attribute__((target("arch=rocketlake"))) foo(void) {return 12;}
+int __attribute__((target("arch=core2"))) foo(void) {return 13;}
 int __attribute__((target("default"))) foo(void) { return 2; }
 
 int bar(void) {
@@ -146,6 +147,8 @@
 // LINUX: ret i32 11
 // LINUX: define{{.*}} i32 @foo.arch_rocketlake()
 // LINUX: ret i32 12
+// LINUX: define{{.*}} i32 @foo.arch_core2()
+// LINUX: ret i32 13
 // LINUX: define{{.*}} i32 @foo()
 // LINUX: ret i32 2
 // LINUX: define{{.*}} i32 @bar()
@@ -175,6 +178,8 @@
 // WINDOWS: ret i32 11
 // WINDOWS: define dso_local i32 @foo.arch_rocketlake()
 // WINDOWS: ret i32 12
+// WINDOWS: define dso_local i32 @foo.arch_core2()
+// WINDOWS: ret i32 13
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D130964: Enable __bf16 for x86 targets.

2022-08-01 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
Herald added subscribers: luke957, pengfei, s.egerton, simoncook.
Herald added a project: All.
FreddyYe requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, pcwang-thead.
Herald added projects: clang, LLVM.

X86 psABI has updated to support __bf16 type, the ABI of which is the
same as FP16. See 
https://discourse.llvm.org/t/patch-add-optional-bfloat16-support/63149


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D130964

Files:
  clang/docs/LanguageExtensions.rst
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/X86/bfloat-abi.c
  clang/test/CodeGen/X86/bfloat-half-abi.c
  clang/test/CodeGen/X86/bfloat-mangle.cpp
  clang/test/Sema/vector-decl-crash.c
  llvm/include/llvm/IR/Type.h

Index: llvm/include/llvm/IR/Type.h
===
--- llvm/include/llvm/IR/Type.h
+++ llvm/include/llvm/IR/Type.h
@@ -144,6 +144,11 @@
   /// Return true if this is 'bfloat', a 16-bit bfloat type.
   bool isBFloatTy() const { return getTypeID() == BFloatTyID; }
 
+  /// Return true if this is a 16-bit float type.
+  bool is16bitFPTy() const {
+return getTypeID() == BFloatTyID || getTypeID() == HalfTyID;
+  }
+
   /// Return true if this is 'float', a 32-bit IEEE fp type.
   bool isFloatTy() const { return getTypeID() == FloatTyID; }
 
Index: clang/test/Sema/vector-decl-crash.c
===
--- clang/test/Sema/vector-decl-crash.c
+++ clang/test/Sema/vector-decl-crash.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -fsyntax-only -verify -triple x86_64-unknown-unknown
+// RUN: %clang_cc1 %s -fsyntax-only -verify -triple riscv64-unknown-unknown
 
 // GH50171
 // This would previously crash when __bf16 was not a supported type.
Index: clang/test/CodeGen/X86/bfloat-mangle.cpp
===
--- /dev/null
+++ clang/test/CodeGen/X86/bfloat-mangle.cpp
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -triple i386-unknown-unknown -target-feature +sse2 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-feature +sse2 -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: define {{.*}}void @_Z3foou6__bf16(bfloat noundef %b)
+void foo(__bf16 b) {}
Index: clang/test/CodeGen/X86/bfloat-half-abi.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/bfloat-half-abi.c
@@ -0,0 +1,149 @@
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm  -target-feature +sse2 < %s | FileCheck %s --check-prefixes=CHECK
+
+struct bfloat1 {
+  __bf16 a;
+};
+
+struct bfloat1 h1(__bf16 a) {
+  // CHECK: define{{.*}}bfloat @
+  struct bfloat1 x;
+  x.a = a;
+  return x;
+}
+
+struct bfloat2 {
+  __bf16 a;
+  __bf16 b;
+};
+
+struct bfloat2 h2(__bf16 a, __bf16 b) {
+  // CHECK: define{{.*}}<2 x bfloat> @
+  struct bfloat2 x;
+  x.a = a;
+  x.b = b;
+  return x;
+}
+
+struct bfloat3 {
+  __bf16 a;
+  __bf16 b;
+  __bf16 c;
+};
+
+struct bfloat3 h3(__bf16 a, __bf16 b, __bf16 c) {
+  // CHECK: define{{.*}}<4 x bfloat> @
+  struct bfloat3 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  return x;
+}
+
+struct bfloat4 {
+  __bf16 a;
+  __bf16 b;
+  __bf16 c;
+  __bf16 d;
+};
+
+struct bfloat4 h4(__bf16 a, __bf16 b, __bf16 c, __bf16 d) {
+  // CHECK: define{{.*}}<4 x bfloat> @
+  struct bfloat4 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  x.d = d;
+  return x;
+}
+
+struct floatbfloat {
+  float a;
+  __bf16 b;
+};
+
+struct floatbfloat fh(float a, __bf16 b) {
+  // CHECK: define{{.*}}<4 x half> @
+  struct floatbfloat x;
+  x.a = a;
+  x.b = b;
+  return x;
+}
+
+struct floatbfloat2 {
+  float a;
+  __bf16 b;
+  __bf16 c;
+};
+
+struct floatbfloat2 fh2(float a, __bf16 b, __bf16 c) {
+  // CHECK: define{{.*}}<4 x half> @
+  struct floatbfloat2 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  return x;
+}
+
+struct bfloatfloat {
+  __bf16 a;
+  float b;
+};
+
+struct bfloatfloat hf(__bf16 a, float b) {
+  // CHECK: define{{.*}}<4 x half> @
+  struct bfloatfloat x;
+  x.a = a;
+  x.b = b;
+  return x;
+}
+
+struct bfloat2float {
+  __bf16 a;
+  __bf16 b;
+  float c;
+};
+
+struct bfloat2float h2f(__bf16 a, __bf16 b, float c) {
+  // CHECK: define{{.*}}<4 x bfloat> @
+  struct bfloat2float x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  return x;
+}
+
+struct floatbfloat3 {
+  float a;
+  __bf16 b;
+  __bf16 c;
+  __bf16 d;
+};
+
+struct floatbfloat3 fh3(float a, __bf16 b, __bf16 c, __bf16 d) {
+  // CHECK: define{{.*}}{ <4 x half>, bfloat } @
+  struct floatbfloat3 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  x.d = d;
+  return x;
+}
+
+struct bfloat5 {
+  __bf16 a;
+  __bf16 b;
+  __bf16 c;
+  __bf16 d;
+  __bf16 e;
+};
+
+struct bfloat5 h5(__bf16 a, __bf16 b, __bf16 c, __bf16 d, __bf16 e) {
+  // CHECK: define{{.*}}{ <4 x bfloat>, bfloat } @
+  struct bfloat5 x;
+  x.a = a;
+  x.b = b;
+  x.c = c;
+  x.d = d;
+  x.e =

[PATCH] D130964: [X86][BF16] Enable __bf16 for x86 targets.

2022-08-02 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

In D130964#3694540 , @bkramer wrote:

> In D130964#3694473 , @rjmccall 
> wrote:
>
>> How are you actually implementing `__bf16` on these targets?  There isn't 
>> even hardware support for conversions.
>
> `bf16` -> `float` is really just a bit shift. The other direction gets 
> lowered to a libcall, compiler-rt has a conversion function with proper 
> rounding. I added some support to make the backend promote all other 
> arithmetic to float, but I think that's only enabled on x86 so far.

Yes, we can view x86 backend has been dealing with `__bf16`. And with 
https://reviews.llvm.org/D130832, it will complete follow psABI. About hardware 
support, x86 actually has supported bf16 since AVX512BF16 
(https://reviews.llvm.org/D60552), which has vector conversion support between 
float and bf16. However, at that time we chose a `typedef short` as C type. In 
the future, we can support backend lowering for those instructions: 
VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D130964/new/

https://reviews.llvm.org/D130964

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D130964: [X86][BF16] Enable __bf16 for x86 targets.

2022-08-03 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

> Right, but this patch is adding x86 support whenever SSE2 is available.  
> AVX512BF16 is available on a *very* small slice of processors.  In contrast, 
> e.g. F16C is relatively broadly available, although I understand that we 
> formally support `_Float16` all the way back to SSE2 and thus on some 
> processors that lack F16C.
>
> But okay, pure intrinsic support is fine if that's what we're doing.
>
> I think the patch looks fine.

Yes. This type is for pure intrinsic support. Thanks for your review. Let's 
wait for the backend patch to land first.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D130964/new/

https://reviews.llvm.org/D130964

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106849: [NFC][X86] add missing tests in clang/test/CodeGen/attr-target-mv.c

2021-07-26 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
FreddyYe requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D106849

Files:
  clang/test/CodeGen/attr-target-mv.c


Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -129,6 +129,16 @@
 // WINDOWS: ret i32 6
 // WINDOWS: define dso_local i32 @foo.arch_icelake-server()
 // WINDOWS: ret i32 7
+// WINDOWS: define{{.*}} i32 @foo.arch_cooperlake()
+// WINDOWS: ret i32 8
+// WINDOWS: define{{.*}} i32 @foo.arch_tigerlake()
+// WINDOWS: ret i32 9
+// WINDOWS: define{{.*}} i32 @foo.arch_sapphirerapids()
+// WINDOWS: ret i32 10
+// WINDOWS: define{{.*}} i32 @foo.arch_alderlake()
+// WINDOWS: ret i32 11
+// WINDOWS: define{{.*}} i32 @foo.arch_rocketlake()
+// WINDOWS: ret i32 12
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()


Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -129,6 +129,16 @@
 // WINDOWS: ret i32 6
 // WINDOWS: define dso_local i32 @foo.arch_icelake-server()
 // WINDOWS: ret i32 7
+// WINDOWS: define{{.*}} i32 @foo.arch_cooperlake()
+// WINDOWS: ret i32 8
+// WINDOWS: define{{.*}} i32 @foo.arch_tigerlake()
+// WINDOWS: ret i32 9
+// WINDOWS: define{{.*}} i32 @foo.arch_sapphirerapids()
+// WINDOWS: ret i32 10
+// WINDOWS: define{{.*}} i32 @foo.arch_alderlake()
+// WINDOWS: ret i32 11
+// WINDOWS: define{{.*}} i32 @foo.arch_rocketlake()
+// WINDOWS: ret i32 12
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106849: [NFC][X86] add missing tests in clang/test/CodeGen/attr-target-mv.c

2021-07-26 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 361908.
FreddyYe added a comment.

fix


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106849/new/

https://reviews.llvm.org/D106849

Files:
  clang/test/CodeGen/attr-target-mv.c


Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -129,6 +129,16 @@
 // WINDOWS: ret i32 6
 // WINDOWS: define dso_local i32 @foo.arch_icelake-server()
 // WINDOWS: ret i32 7
+// WINDOWS: define dso_local i32 @foo.arch_cooperlake()
+// WINDOWS: ret i32 8
+// WINDOWS: define dso_local i32 @foo.arch_tigerlake()
+// WINDOWS: ret i32 9
+// WINDOWS: define dso_local i32 @foo.arch_sapphirerapids()
+// WINDOWS: ret i32 10
+// WINDOWS: define dso_local i32 @foo.arch_alderlake()
+// WINDOWS: ret i32 11
+// WINDOWS: define dso_local i32 @foo.arch_rocketlake()
+// WINDOWS: ret i32 12
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()


Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -129,6 +129,16 @@
 // WINDOWS: ret i32 6
 // WINDOWS: define dso_local i32 @foo.arch_icelake-server()
 // WINDOWS: ret i32 7
+// WINDOWS: define dso_local i32 @foo.arch_cooperlake()
+// WINDOWS: ret i32 8
+// WINDOWS: define dso_local i32 @foo.arch_tigerlake()
+// WINDOWS: ret i32 9
+// WINDOWS: define dso_local i32 @foo.arch_sapphirerapids()
+// WINDOWS: ret i32 10
+// WINDOWS: define dso_local i32 @foo.arch_alderlake()
+// WINDOWS: ret i32 11
+// WINDOWS: define dso_local i32 @foo.arch_rocketlake()
+// WINDOWS: ret i32 12
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106849: [NFC][X86] add missing tests in clang/test/CodeGen/attr-target-mv.c

2021-07-27 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 361919.
FreddyYe added a comment.

lowering pre-merge issues.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106849/new/

https://reviews.llvm.org/D106849

Files:
  clang/test/CodeGen/attr-target-mv.c


Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -129,6 +129,8 @@
 // WINDOWS: ret i32 6
 // WINDOWS: define dso_local i32 @foo.arch_icelake-server()
 // WINDOWS: ret i32 7
+// WINDOWS: define dso_local i32 @foo.arch_cooperlake()
+// WINDOWS: ret i32 8
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()


Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -129,6 +129,8 @@
 // WINDOWS: ret i32 6
 // WINDOWS: define dso_local i32 @foo.arch_icelake-server()
 // WINDOWS: ret i32 7
+// WINDOWS: define dso_local i32 @foo.arch_cooperlake()
+// WINDOWS: ret i32 8
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106849: [NFC][X86] add missing tests in clang/test/CodeGen/attr-target-mv.c

2021-07-27 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 361935.
FreddyYe added a comment.

rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106849/new/

https://reviews.llvm.org/D106849

Files:
  clang/test/CodeGen/attr-target-mv.c


Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -129,6 +129,16 @@
 // WINDOWS: ret i32 6
 // WINDOWS: define dso_local i32 @foo.arch_icelake-server()
 // WINDOWS: ret i32 7
+// WINDOWS: define dso_local i32 @foo.arch_cooperlake()
+// WINDOWS: ret i32 8
+// WINDOWS: define dso_local i32 @foo.arch_tigerlake()
+// WINDOWS: ret i32 9
+// WINDOWS: define dso_local i32 @foo.arch_sapphirerapids()
+// WINDOWS: ret i32 10
+// WINDOWS: define dso_local i32 @foo.arch_alderlake()
+// WINDOWS: ret i32 11
+// WINDOWS: define dso_local i32 @foo.arch_rocketlake()
+// WINDOWS: ret i32 12
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()


Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -129,6 +129,16 @@
 // WINDOWS: ret i32 6
 // WINDOWS: define dso_local i32 @foo.arch_icelake-server()
 // WINDOWS: ret i32 7
+// WINDOWS: define dso_local i32 @foo.arch_cooperlake()
+// WINDOWS: ret i32 8
+// WINDOWS: define dso_local i32 @foo.arch_tigerlake()
+// WINDOWS: ret i32 9
+// WINDOWS: define dso_local i32 @foo.arch_sapphirerapids()
+// WINDOWS: ret i32 10
+// WINDOWS: define dso_local i32 @foo.arch_alderlake()
+// WINDOWS: ret i32 11
+// WINDOWS: define dso_local i32 @foo.arch_rocketlake()
+// WINDOWS: ret i32 12
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106849: [NFC][X86] add missing tests in clang/test/CodeGen/attr-target-mv.c

2021-07-28 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 362596.
FreddyYe added a comment.

rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106849/new/

https://reviews.llvm.org/D106849

Files:
  clang/test/CodeGen/attr-target-mv.c


Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -129,6 +129,16 @@
 // WINDOWS: ret i32 6
 // WINDOWS: define dso_local i32 @foo.arch_icelake-server()
 // WINDOWS: ret i32 7
+// WINDOWS: define dso_local i32 @foo.arch_cooperlake()
+// WINDOWS: ret i32 8
+// WINDOWS: define dso_local i32 @foo.arch_tigerlake()
+// WINDOWS: ret i32 9
+// WINDOWS: define dso_local i32 @foo.arch_sapphirerapids()
+// WINDOWS: ret i32 10
+// WINDOWS: define dso_local i32 @foo.arch_alderlake()
+// WINDOWS: ret i32 11
+// WINDOWS: define dso_local i32 @foo.arch_rocketlake()
+// WINDOWS: ret i32 12
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()


Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -129,6 +129,16 @@
 // WINDOWS: ret i32 6
 // WINDOWS: define dso_local i32 @foo.arch_icelake-server()
 // WINDOWS: ret i32 7
+// WINDOWS: define dso_local i32 @foo.arch_cooperlake()
+// WINDOWS: ret i32 8
+// WINDOWS: define dso_local i32 @foo.arch_tigerlake()
+// WINDOWS: ret i32 9
+// WINDOWS: define dso_local i32 @foo.arch_sapphirerapids()
+// WINDOWS: ret i32 10
+// WINDOWS: define dso_local i32 @foo.arch_alderlake()
+// WINDOWS: ret i32 11
+// WINDOWS: define dso_local i32 @foo.arch_rocketlake()
+// WINDOWS: ret i32 12
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106849: [NFC][X86] add missing tests in clang/test/CodeGen/attr-target-mv.c

2021-07-28 Thread Freddy, Ye via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG58712987e56f: [NFC][X86] add missing tests in 
clang/test/CodeGen/attr-target-mv.c (authored by FreddyYe).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106849/new/

https://reviews.llvm.org/D106849

Files:
  clang/test/CodeGen/attr-target-mv.c


Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -129,6 +129,16 @@
 // WINDOWS: ret i32 6
 // WINDOWS: define dso_local i32 @foo.arch_icelake-server()
 // WINDOWS: ret i32 7
+// WINDOWS: define dso_local i32 @foo.arch_cooperlake()
+// WINDOWS: ret i32 8
+// WINDOWS: define dso_local i32 @foo.arch_tigerlake()
+// WINDOWS: ret i32 9
+// WINDOWS: define dso_local i32 @foo.arch_sapphirerapids()
+// WINDOWS: ret i32 10
+// WINDOWS: define dso_local i32 @foo.arch_alderlake()
+// WINDOWS: ret i32 11
+// WINDOWS: define dso_local i32 @foo.arch_rocketlake()
+// WINDOWS: ret i32 12
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()


Index: clang/test/CodeGen/attr-target-mv.c
===
--- clang/test/CodeGen/attr-target-mv.c
+++ clang/test/CodeGen/attr-target-mv.c
@@ -129,6 +129,16 @@
 // WINDOWS: ret i32 6
 // WINDOWS: define dso_local i32 @foo.arch_icelake-server()
 // WINDOWS: ret i32 7
+// WINDOWS: define dso_local i32 @foo.arch_cooperlake()
+// WINDOWS: ret i32 8
+// WINDOWS: define dso_local i32 @foo.arch_tigerlake()
+// WINDOWS: ret i32 9
+// WINDOWS: define dso_local i32 @foo.arch_sapphirerapids()
+// WINDOWS: ret i32 10
+// WINDOWS: define dso_local i32 @foo.arch_alderlake()
+// WINDOWS: ret i32 11
+// WINDOWS: define dso_local i32 @foo.arch_rocketlake()
+// WINDOWS: ret i32 12
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D86503: [X86] Support -march=sapphirerapids

2020-08-24 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
FreddyYe added reviewers: craig.topper, pengfei, LuoYuanke.
Herald added subscribers: llvm-commits, Sanitizers, cfe-commits, hiraditya.
Herald added projects: clang, Sanitizers, LLVM.
FreddyYe requested review of this revision.

Support -march=sapphirerapids for x86.
Compare with Icelake Server, it includes 14 more new features. They are
amxtile, amxint8, amxbf16, avx512bf16, avx512vp2intersect, cldemote,
enqcmd, movdir64b, movdiri, prwrite, serialize, shstk, tsxldtrk, waitpkg.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D86503

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/attr-target-mv.c
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/test/CodeGen/X86/cpus-intel.ll

Index: llvm/test/CodeGen/X86/cpus-intel.ll
===
--- llvm/test/CodeGen/X86/cpus-intel.ll
+++ llvm/test/CodeGen/X86/cpus-intel.ll
@@ -40,6 +40,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=icelake-client 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=icelake-server 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=tigerlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=sapphirerapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=atom 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
Index: llvm/lib/Target/X86/X86.td
===
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -741,6 +741,25 @@
   list TGLFeatures =
 !listconcat(ICLFeatures, TGLAdditionalFeatures );
 
+  //Sapphirerapids
+  list SPRAdditionalFeatures = [FeatureAMXTILE,
+  FeatureAMXINT8,
+  FeatureAMXBF16,
+  FeatureBF16,
+  FeatureSERIALIZE,
+  FeatureCLDEMOTE,
+  FeatureWAITPKG,
+  FeaturePTWRITE,
+  FeatureTSXLDTRK,
+  FeatureENQCMD,
+  FeatureSHSTK,
+  FeatureVP2INTERSECT,
+  FeatureMOVDIRI,
+  FeatureMOVDIR64B];
+  list SPRTuning = ICXTuning;
+  list SPRFeatures =
+!listconcat(ICXFeatures, SPRAdditionalFeatures);
+
   // Atom
   list AtomFeatures = [FeatureX87,
  FeatureCMPXCHG8B,
@@ -1237,6 +1256,8 @@
 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
 def : ProcModel<"tigerlake", SkylakeServerModel,
 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
+def : ProcModel<"sapphirerapids", SkylakeServerModel,
+ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
 
 // AMD CPUs.
 
Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -195,6 +195,11 @@
 static constexpr FeatureBitset FeaturesTigerlake =
 FeaturesICLClient | FeatureAVX512VP2INTERSECT | FeatureMOVDIR64B |
 FeatureMOVDIRI | FeatureSHSTK;
+static constexpr FeatureBitset FeaturesSapphireRapids =
+FeaturesICLServer | FeatureAMX_TILE | FeatureAMX_INT8 | FeatureAMX_BF16 |
+FeatureAVX512BF16 | FeatureAVX512VP2INTERSECT | FeatureCLDEMOTE | FeatureENQCMD |
+FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE | FeatureSERIALIZE |
+FeatureSHSTK | FeatureTSXLDTRK | FeatureWAITPKG;
 
 // Intel Atom processors.
 // Bonnell has feature parity with Core2 and adds MOVBE.
@@ -342,6 +347,8 @@
   { {"icelake-server"}, CK_IcelakeSe

[PATCH] D86503: [X86] Support -march=sapphirerapids

2020-08-24 Thread Freddy, Ye via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGe02d081f2b60: [X86] Support -march=sapphirerapids (authored 
by FreddyYe).

Changed prior to commit:
  https://reviews.llvm.org/D86503?vs=287547&id=287561#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86503/new/

https://reviews.llvm.org/D86503

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/attr-target-mv.c
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/test/CodeGen/X86/cpus-intel.ll

Index: llvm/test/CodeGen/X86/cpus-intel.ll
===
--- llvm/test/CodeGen/X86/cpus-intel.ll
+++ llvm/test/CodeGen/X86/cpus-intel.ll
@@ -40,6 +40,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=icelake-client 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=icelake-server 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=tigerlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=sapphirerapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=atom 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
Index: llvm/lib/Target/X86/X86.td
===
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -741,6 +741,25 @@
   list TGLFeatures =
 !listconcat(ICLFeatures, TGLAdditionalFeatures );
 
+  //Sapphirerapids
+  list SPRAdditionalFeatures = [FeatureAMXTILE,
+  FeatureAMXINT8,
+  FeatureAMXBF16,
+  FeatureBF16,
+  FeatureSERIALIZE,
+  FeatureCLDEMOTE,
+  FeatureWAITPKG,
+  FeaturePTWRITE,
+  FeatureTSXLDTRK,
+  FeatureENQCMD,
+  FeatureSHSTK,
+  FeatureVP2INTERSECT,
+  FeatureMOVDIRI,
+  FeatureMOVDIR64B];
+  list SPRTuning = ICXTuning;
+  list SPRFeatures =
+!listconcat(ICXFeatures, SPRAdditionalFeatures);
+
   // Atom
   list AtomFeatures = [FeatureX87,
  FeatureCMPXCHG8B,
@@ -1243,6 +1262,8 @@
 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
 def : ProcModel<"tigerlake", SkylakeServerModel,
 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
+def : ProcModel<"sapphirerapids", SkylakeServerModel,
+ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
 
 // AMD CPUs.
 
Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -195,6 +195,11 @@
 static constexpr FeatureBitset FeaturesTigerlake =
 FeaturesICLClient | FeatureAVX512VP2INTERSECT | FeatureMOVDIR64B |
 FeatureMOVDIRI | FeatureSHSTK;
+static constexpr FeatureBitset FeaturesSapphireRapids =
+FeaturesICLServer | FeatureAMX_TILE | FeatureAMX_INT8 | FeatureAMX_BF16 |
+FeatureAVX512BF16 | FeatureAVX512VP2INTERSECT | FeatureCLDEMOTE | FeatureENQCMD |
+FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE | FeatureSERIALIZE |
+FeatureSHSTK | FeatureTSXLDTRK | FeatureWAITPKG;
 
 // Intel Atom processors.
 // Bonnell has feature parity with Core2 and adds MOVBE.
@@ -342,6 +347,8 @@
   { {"icelake-server"}, CK_IcelakeServer, FEATURE_AVX512VBMI2, FeaturesICLServer },
   // Tigerlake microarchitecture based processors.
   { {"tigerlake"}, CK_Tigerlake, FEATURE_

[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-04 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
FreddyYe requested review of this revision.

Tremont microarchitecture only has GFNI(SSE) version, not AVX and
AVX512 version. This patch is to avoid compiling fail on Windows when
using -march=tremont to invoke one of GFNI(SSE) intrinsic.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D90822

Files:
  clang/lib/Headers/gfniintrin.h

Index: clang/lib/Headers/gfniintrin.h
===
--- clang/lib/Headers/gfniintrin.h
+++ clang/lib/Headers/gfniintrin.h
@@ -20,22 +20,27 @@
   (__v16qi)(__m128i)(B),  \
   (char)(I))
 
+#ifdef __AVX__
+#ifdef __AVX512BW__
+#ifdef __AVX512VL__
 #define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
   (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
 (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I),  \
 (__v16qi)(__m128i)(S))
 
-
 #define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
   (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(),   \
 U, A, B, I)
-
+#endif /* __AVX512VL__ */
+#endif /* __AVX512BW__ */
 
 #define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \
   (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A),  \
   (__v32qi)(__m256i)(B),  \
   (char)(I))
 
+#ifdef __AVX512BW__
+#ifdef __AVX512VL__
 #define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U),\
 (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I),   \
@@ -44,13 +49,14 @@
 #define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
   (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
 U, A, B, I)
-
+#endif
 
 #define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \
   (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A),  \
   (__v64qi)(__m512i)(B),  \
   (char)(I))
 
+#ifdef __AVX512VL__
 #define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U),\
 (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I),   \
@@ -59,12 +65,18 @@
 #define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
   (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(),\
 U, A, B, I)
+#endif /* __AVX512VL__ */
+#endif /* __AVX512BW__ */
+#endif /* __AVX__ */
 
 #define _mm_gf2p8affine_epi64_epi8(A, B, I) \
   (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
   (__v16qi)(__m128i)(B),  \
   (char)(I))
 
+#ifdef __AVX__
+#ifdef __AVX512BW__
+#ifdef __AVX512VL__
 #define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
   (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
 (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \
@@ -74,13 +86,16 @@
 #define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
   (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(),  \
 U, A, B, I)
-
+#endif /* __AVX512VL__ */
+#endif /* __AVX512BW__ */
 
 #define _mm256_gf2p8affine_epi64_epi8(A, B, I) \
   (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
   (__v32qi)(__m256i)(B),  \
   (char)(I))
 
+#ifdef __AVX512BW__
+#ifdef __AVX512VL__
 #define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U),\
 (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I),  \
@@ -89,13 +104,14 @@
 #define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
   (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(),\
 U, A, B, I)
-
+#endif /* __AVX512VL__ */
 
 #define _mm512_gf2p8affine_epi64_epi8(A, B, I) \
   (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
   (__v64qi)(__m512i)(B),  \
   (char)(I))
 
+#ifdef __AVX512VL__
 #define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U),\
 (__v64qi)_mm512_gf2p8affine_epi64

[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-04 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

The fails are all unknown type errors on Windows, since those typedefs are 
declared in other header files.
The error message goes like:

  $ clang -march=tremont gfni.c
  ..
  ...\lib\clang\12.0.0\include\gfniintrin.h:129:37: error:
unknown type name '__mmask16'
  _mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)
  ^
  ...\lib\clang\12.0.0\include\gfniintrin.h:137:25: error:
unknown type name '__mmask16'
  _mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)
  ^
  ..
  ...\lib\clang\12.0.0\include\gfniintrin.h:159:43: error:
unknown type name '__m256i'
  _mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)
^
  ...
  fatal error: too many errors emitted, stopping now [-ferror-limit=]
  20 errors generated.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-04 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

In D90822#2375423 , @pengfei wrote:

> Craig's method sounds good.
> @FreddyYe , Why we check AVX512BW instead of AVX512F. I saw SDM says it 
> depends on AVX512F.

I was referring to the old implement and test case. Seems like a error 
introduced before.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-04 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 303047.
FreddyYe added a comment.

Use header file macros instead.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

Files:
  clang/lib/Headers/gfniintrin.h

Index: clang/lib/Headers/gfniintrin.h
===
--- clang/lib/Headers/gfniintrin.h
+++ clang/lib/Headers/gfniintrin.h
@@ -20,22 +20,27 @@
   (__v16qi)(__m128i)(B),  \
   (char)(I))
 
+#ifdef __AVXINTRIN_H
+#ifdef __AVX512BWINTRIN_H
+#ifdef __AVX512VLINTRIN_H
 #define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
   (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
 (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I),  \
 (__v16qi)(__m128i)(S))
 
-
 #define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
   (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(),   \
 U, A, B, I)
-
+#endif /* __AVX512VLINTRIN_H */
+#endif /* __AVX512BWINTRIN_H */
 
 #define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \
   (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A),  \
   (__v32qi)(__m256i)(B),  \
   (char)(I))
 
+#ifdef __AVX512BWINTRIN_H
+#ifdef __AVX512VLINTRIN_H
 #define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U),\
 (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I),   \
@@ -44,13 +49,14 @@
 #define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
   (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
 U, A, B, I)
-
+#endif
 
 #define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \
   (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A),  \
   (__v64qi)(__m512i)(B),  \
   (char)(I))
 
+#ifdef __AVX512VLINTRIN_H
 #define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U),\
 (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I),   \
@@ -59,12 +65,18 @@
 #define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
   (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(),\
 U, A, B, I)
+#endif /* __AVX512VLINTRIN_H */
+#endif /* __AVX512BWINTRIN_H */
+#endif /* __AVXINTRIN_H */
 
 #define _mm_gf2p8affine_epi64_epi8(A, B, I) \
   (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
   (__v16qi)(__m128i)(B),  \
   (char)(I))
 
+#ifdef __AVXINTRIN_H
+#ifdef __AVX512BWINTRIN_H
+#ifdef __AVX512VLINTRIN_H
 #define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
   (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
 (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \
@@ -74,13 +86,16 @@
 #define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
   (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(),  \
 U, A, B, I)
-
+#endif /* __AVX512VLINTRIN_H */
+#endif /* __AVX512BWINTRIN_H */
 
 #define _mm256_gf2p8affine_epi64_epi8(A, B, I) \
   (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
   (__v32qi)(__m256i)(B),  \
   (char)(I))
 
+#ifdef __AVX512BWINTRIN_H
+#ifdef __AVX512VLINTRIN_H
 #define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U),\
 (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I),  \
@@ -89,13 +104,14 @@
 #define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
   (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(),\
 U, A, B, I)
-
+#endif /* __AVX512VLINTRIN_H */
 
 #define _mm512_gf2p8affine_epi64_epi8(A, B, I) \
   (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
   (__v64qi)(__m512i)(B),  \
   (char)(I))
 
+#ifdef __AVX512VLINTRIN_H
 #define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U),\
 (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I),  \
@@ -104,

[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-05 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 303073.
FreddyYe added a comment.

Reorganize intrinsic orders to avoid using nested macros.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

Files:
  clang/lib/Headers/gfniintrin.h

Index: clang/lib/Headers/gfniintrin.h
===
--- clang/lib/Headers/gfniintrin.h
+++ clang/lib/Headers/gfniintrin.h
@@ -14,28 +14,84 @@
 #ifndef __GFNIINTRIN_H
 #define __GFNIINTRIN_H
 
+/* Default attributes for simple form (no masking). */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128)))
+
+/* Default attributes for YMM unmasked form. */
+#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
+
+/* Default attributes for ZMM forms. */
+#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
+
+/* Default attributes for VLX forms. */
+#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
 
 #define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
   (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A),  \
   (__v16qi)(__m128i)(B),  \
   (char)(I))
 
+#define _mm_gf2p8affine_epi64_epi8(A, B, I) \
+  (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
+  (__v16qi)(__m128i)(B),  \
+  (char)(I))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
+  (__v16qi) __B);
+}
+
+#ifdef __AVXINTRIN_H
+#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \
+  (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A),  \
+  (__v32qi)(__m256i)(B),  \
+  (char)(I))
+
+#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \
+  (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
+  (__v32qi)(__m256i)(B),  \
+  (char)(I))
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS_Y
+_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,
+  (__v32qi) __B);
+}
+#endif /* __AVXINTRIN_H */
+
+#ifdef __AVX512BWINTRIN_H
+#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \
+  (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A),  \
+  (__v64qi)(__m512i)(B),  \
+  (char)(I))
+
+#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \
+  (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
+  (__v64qi)(__m512i)(B),  \
+  (char)(I))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
+_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,
+  (__v64qi) __B);
+}
+#endif /* __AVX512BWINTRIN_H */
+
+#ifdef __AVX512VLBWINTRIN_H
 #define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
   (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
 (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I),  \
 (__v16qi)(__m128i)(S))
 
-
 #define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
   (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(),   \
 U, A, B, I)
 
-
-#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \
-  (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A),  \
-  (__v32qi)(__m256i)(B),  \
-  (char)(I))
-
 #define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U),\
 (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I),   \
@@ -45,12 +101,6 @@
   (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
 U, A, B, I)
 
-
-#define _mm512_gf2p8affineinv_

[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-05 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked 3 inline comments as done.
FreddyYe added inline comments.



Comment at: clang/lib/Headers/gfniintrin.h:131-138
 #define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U), 
   \
 (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I),   
   \
 (__v64qi)(__m512i)(S))
 
 #define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
   (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), 
  \

pengfei wrote:
> These 2 functions need to move to __AVX512BWINTRIN_H
Thanks for review!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-05 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 303087.
FreddyYe marked an inline comment as done.
FreddyYe added a comment.

Refine


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

Files:
  clang/lib/Headers/gfniintrin.h

Index: clang/lib/Headers/gfniintrin.h
===
--- clang/lib/Headers/gfniintrin.h
+++ clang/lib/Headers/gfniintrin.h
@@ -14,38 +14,56 @@
 #ifndef __GFNIINTRIN_H
 #define __GFNIINTRIN_H
 
+/* Default attributes for simple form (no masking). */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128)))
+
+/* Default attributes for YMM unmasked form. */
+#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
+
+/* Default attributes for ZMM forms. */
+#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
+
+/* Default attributes for VLX forms. */
+#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
 
 #define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
   (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A),  \
   (__v16qi)(__m128i)(B),  \
   (char)(I))
 
-#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
-  (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
-(__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I),  \
-(__v16qi)(__m128i)(S))
-
-
-#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
-  (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(),   \
-U, A, B, I)
+#define _mm_gf2p8affine_epi64_epi8(A, B, I) \
+  (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
+  (__v16qi)(__m128i)(B),  \
+  (char)(I))
 
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
+  (__v16qi) __B);
+}
 
+#ifdef __AVXINTRIN_H
 #define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \
   (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A),  \
   (__v32qi)(__m256i)(B),  \
   (char)(I))
 
-#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
-   (__m256i)__builtin_ia32_selectb_256((__mmask32)(U),\
-(__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I),   \
-(__v32qi)(__m256i)(S))
-
-#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
-  (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
-U, A, B, I)
+#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \
+  (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
+  (__v32qi)(__m256i)(B),  \
+  (char)(I))
 
+static __inline__ __m256i __DEFAULT_FN_ATTRS_Y
+_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,
+  (__v32qi) __B);
+}
+#endif /* __AVXINTRIN_H */
 
+#ifdef __AVX512BWINTRIN_H
 #define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \
   (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A),  \
   (__v64qi)(__m512i)(B),  \
@@ -60,27 +78,71 @@
   (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(),\
 U, A, B, I)
 
-#define _mm_gf2p8affine_epi64_epi8(A, B, I) \
-  (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
-  (__v16qi)(__m128i)(B),  \
+#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \
+  (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
+  (__v64qi)(__m512i)(B),  \
   (char)(I))
 
+#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
+   (__m512i)__builtin_ia32_selectb_512((__mmask64)(U),\
+(__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I),  \
+  

[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-06 Thread Freddy, Ye via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG5e312e004197: [X86] use macros to split GFNI intrinsics into 
different kinds (authored by FreddyYe).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

Files:
  clang/lib/Headers/gfniintrin.h

Index: clang/lib/Headers/gfniintrin.h
===
--- clang/lib/Headers/gfniintrin.h
+++ clang/lib/Headers/gfniintrin.h
@@ -14,38 +14,56 @@
 #ifndef __GFNIINTRIN_H
 #define __GFNIINTRIN_H
 
+/* Default attributes for simple form (no masking). */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128)))
+
+/* Default attributes for YMM unmasked form. */
+#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
+
+/* Default attributes for ZMM forms. */
+#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
+
+/* Default attributes for VLX forms. */
+#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
 
 #define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
   (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A),  \
   (__v16qi)(__m128i)(B),  \
   (char)(I))
 
-#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
-  (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
-(__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I),  \
-(__v16qi)(__m128i)(S))
-
-
-#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
-  (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(),   \
-U, A, B, I)
+#define _mm_gf2p8affine_epi64_epi8(A, B, I) \
+  (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
+  (__v16qi)(__m128i)(B),  \
+  (char)(I))
 
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
+  (__v16qi) __B);
+}
 
+#ifdef __AVXINTRIN_H
 #define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \
   (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A),  \
   (__v32qi)(__m256i)(B),  \
   (char)(I))
 
-#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
-   (__m256i)__builtin_ia32_selectb_256((__mmask32)(U),\
-(__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I),   \
-(__v32qi)(__m256i)(S))
-
-#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
-  (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
-U, A, B, I)
+#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \
+  (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
+  (__v32qi)(__m256i)(B),  \
+  (char)(I))
 
+static __inline__ __m256i __DEFAULT_FN_ATTRS_Y
+_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,
+  (__v32qi) __B);
+}
+#endif /* __AVXINTRIN_H */
 
+#ifdef __AVX512BWINTRIN_H
 #define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \
   (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A),  \
   (__v64qi)(__m512i)(B),  \
@@ -60,27 +78,71 @@
   (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(),\
 U, A, B, I)
 
-#define _mm_gf2p8affine_epi64_epi8(A, B, I) \
-  (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
-  (__v16qi)(__m128i)(B),  \
+#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \
+  (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
+  (__v64qi)(__m512i)(B),  \
   (char)(I))
 
+#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
+   (__m512i)__builtin_ia32_selectb_512((__mmask64)

[PATCH] D95421: [NFC] Refine some uninitialized used variables.

2021-01-25 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
Herald added subscribers: pengfei, tpr, hiraditya, qcolombet, MatzeB.
FreddyYe requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

These warning are reported by static code analysis tool: Klocwork


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D95421

Files:
  clang/lib/CodeGen/CGBlocks.cpp
  llvm/lib/CodeGen/RegisterCoalescer.cpp
  llvm/lib/Target/X86/X86ISelLowering.cpp

Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13830,7 +13830,7 @@
   assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
 
   // Attempt to match the insertps pattern.
-  unsigned InsertPSMask;
+  unsigned InsertPSMask = 0;
   if (!matchShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG))
 return SDValue();
 
Index: llvm/lib/CodeGen/RegisterCoalescer.cpp
===
--- llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -442,7 +442,7 @@
   Flipped = CrossClass = false;
 
   Register Src, Dst;
-  unsigned SrcSub, DstSub;
+  unsigned SrcSub = 0, DstSub = 0;
   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
 return false;
   Partial = SrcSub || DstSub;
@@ -537,7 +537,7 @@
   if (!MI)
 return false;
   Register Src, Dst;
-  unsigned SrcSub, DstSub;
+  unsigned SrcSub = 0, DstSub = 0;
   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
 return false;
 
@@ -1590,7 +1590,7 @@
   // CoalescerPair may have a new register class with adjusted subreg indices
   // at this point.
   Register SrcReg, DstReg;
-  unsigned SrcSubIdx, DstSubIdx;
+  unsigned SrcSubIdx = 0, DstSubIdx = 0;
   if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
 return nullptr;
 
@@ -1966,7 +1966,7 @@
 if (!canJoinPhys(CP)) {
   // Before giving up coalescing, if definition of source is defined by
   // trivial computation, try rematerializing it.
-  bool IsDefCopy;
+  bool IsDefCopy = false;
   if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
 return true;
   if (IsDefCopy)
@@ -2005,7 +2005,7 @@
 
 // If definition of source is defined by trivial computation, try
 // rematerializing it.
-bool IsDefCopy;
+bool IsDefCopy = false;
 if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
   return true;
 
@@ -3798,7 +3798,7 @@
   if (!UseTerminalRule)
 return false;
   Register SrcReg, DstReg;
-  unsigned SrcSubReg, DstSubReg;
+  unsigned SrcSubReg = 0, DstSubReg = 0;
   if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
 return false;
   // Check if the destination of this copy has any other affinity.
@@ -3823,7 +3823,7 @@
 if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
   continue;
 Register OtherSrcReg, OtherReg;
-unsigned OtherSrcSubReg, OtherSubReg;
+unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
 if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
 OtherSubReg))
   return false;
Index: clang/lib/CodeGen/CGBlocks.cpp
===
--- clang/lib/CodeGen/CGBlocks.cpp
+++ clang/lib/CodeGen/CGBlocks.cpp
@@ -2697,7 +2697,7 @@
   }
 
   bool HasByrefExtendedLayout = false;
-  Qualifiers::ObjCLifetime Lifetime;
+  Qualifiers::ObjCLifetime Lifetime = Qualifiers::OCL_None;;
   if (getContext().getByrefLifetime(Ty, Lifetime, HasByrefExtendedLayout) &&
   HasByrefExtendedLayout) {
 /// void *__byref_variable_layout;
@@ -2767,8 +2767,8 @@
   const VarDecl &D = *emission.Variable;
   QualType type = D.getType();
 
-  bool HasByrefExtendedLayout;
-  Qualifiers::ObjCLifetime ByrefLifetime;
+  bool HasByrefExtendedLayout = false;
+  Qualifiers::ObjCLifetime ByrefLifetime = Qualifiers::OCL_None;
   bool ByRefHasLifetime =
 getContext().getByrefLifetime(type, ByrefLifetime, HasByrefExtendedLayout);
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D95421: [NFC] Refine some uninitialized used variables.

2021-01-25 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 319216.
FreddyYe added a comment.

refine according to Lint


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95421/new/

https://reviews.llvm.org/D95421

Files:
  clang/lib/CodeGen/CGBlocks.cpp
  llvm/lib/CodeGen/RegisterCoalescer.cpp
  llvm/lib/Target/X86/X86ISelLowering.cpp

Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13830,7 +13830,7 @@
   assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
 
   // Attempt to match the insertps pattern.
-  unsigned InsertPSMask;
+  unsigned InsertPSMask = 0;
   if (!matchShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG))
 return SDValue();
 
Index: llvm/lib/CodeGen/RegisterCoalescer.cpp
===
--- llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -442,7 +442,7 @@
   Flipped = CrossClass = false;
 
   Register Src, Dst;
-  unsigned SrcSub, DstSub;
+  unsigned SrcSub = 0, DstSub = 0;
   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
 return false;
   Partial = SrcSub || DstSub;
@@ -537,7 +537,7 @@
   if (!MI)
 return false;
   Register Src, Dst;
-  unsigned SrcSub, DstSub;
+  unsigned SrcSub = 0, DstSub = 0;
   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
 return false;
 
@@ -1590,7 +1590,7 @@
   // CoalescerPair may have a new register class with adjusted subreg indices
   // at this point.
   Register SrcReg, DstReg;
-  unsigned SrcSubIdx, DstSubIdx;
+  unsigned SrcSubIdx = 0, DstSubIdx = 0;
   if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
 return nullptr;
 
@@ -1966,7 +1966,7 @@
 if (!canJoinPhys(CP)) {
   // Before giving up coalescing, if definition of source is defined by
   // trivial computation, try rematerializing it.
-  bool IsDefCopy;
+  bool IsDefCopy = false;
   if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
 return true;
   if (IsDefCopy)
@@ -2005,7 +2005,7 @@
 
 // If definition of source is defined by trivial computation, try
 // rematerializing it.
-bool IsDefCopy;
+bool IsDefCopy = false;
 if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
   return true;
 
@@ -3798,7 +3798,7 @@
   if (!UseTerminalRule)
 return false;
   Register SrcReg, DstReg;
-  unsigned SrcSubReg, DstSubReg;
+  unsigned SrcSubReg = 0, DstSubReg = 0;
   if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
 return false;
   // Check if the destination of this copy has any other affinity.
@@ -3823,7 +3823,7 @@
 if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
   continue;
 Register OtherSrcReg, OtherReg;
-unsigned OtherSrcSubReg, OtherSubReg;
+unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
 if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
 OtherSubReg))
   return false;
Index: clang/lib/CodeGen/CGBlocks.cpp
===
--- clang/lib/CodeGen/CGBlocks.cpp
+++ clang/lib/CodeGen/CGBlocks.cpp
@@ -2697,7 +2697,7 @@
   }
 
   bool HasByrefExtendedLayout = false;
-  Qualifiers::ObjCLifetime Lifetime;
+  Qualifiers::ObjCLifetime Lifetime = Qualifiers::OCL_None;
   if (getContext().getByrefLifetime(Ty, Lifetime, HasByrefExtendedLayout) &&
   HasByrefExtendedLayout) {
 /// void *__byref_variable_layout;
@@ -2767,8 +2767,8 @@
   const VarDecl &D = *emission.Variable;
   QualType type = D.getType();
 
-  bool HasByrefExtendedLayout;
-  Qualifiers::ObjCLifetime ByrefLifetime;
+  bool HasByrefExtendedLayout = false;
+  Qualifiers::ObjCLifetime ByrefLifetime = Qualifiers::OCL_None;
   bool ByRefHasLifetime =
 getContext().getByrefLifetime(type, ByrefLifetime, HasByrefExtendedLayout);
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D94466: [X86] merge "={eax}" and "~{eax}" into "=&eax" for MSInlineASM

2021-01-25 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

Ping?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D94466/new/

https://reviews.llvm.org/D94466

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D95421: [NFC] Refine some uninitialized used variables.

2021-01-26 Thread Freddy, Ye via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGb3b0acdc6fb5: [NFC] Refine some uninitialized used 
variables. (authored by FreddyYe).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95421/new/

https://reviews.llvm.org/D95421

Files:
  clang/lib/CodeGen/CGBlocks.cpp
  llvm/lib/CodeGen/RegisterCoalescer.cpp
  llvm/lib/Target/X86/X86ISelLowering.cpp

Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13830,7 +13830,7 @@
   assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
 
   // Attempt to match the insertps pattern.
-  unsigned InsertPSMask;
+  unsigned InsertPSMask = 0;
   if (!matchShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG))
 return SDValue();
 
Index: llvm/lib/CodeGen/RegisterCoalescer.cpp
===
--- llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -442,7 +442,7 @@
   Flipped = CrossClass = false;
 
   Register Src, Dst;
-  unsigned SrcSub, DstSub;
+  unsigned SrcSub = 0, DstSub = 0;
   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
 return false;
   Partial = SrcSub || DstSub;
@@ -537,7 +537,7 @@
   if (!MI)
 return false;
   Register Src, Dst;
-  unsigned SrcSub, DstSub;
+  unsigned SrcSub = 0, DstSub = 0;
   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
 return false;
 
@@ -1590,7 +1590,7 @@
   // CoalescerPair may have a new register class with adjusted subreg indices
   // at this point.
   Register SrcReg, DstReg;
-  unsigned SrcSubIdx, DstSubIdx;
+  unsigned SrcSubIdx = 0, DstSubIdx = 0;
   if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
 return nullptr;
 
@@ -1966,7 +1966,7 @@
 if (!canJoinPhys(CP)) {
   // Before giving up coalescing, if definition of source is defined by
   // trivial computation, try rematerializing it.
-  bool IsDefCopy;
+  bool IsDefCopy = false;
   if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
 return true;
   if (IsDefCopy)
@@ -2005,7 +2005,7 @@
 
 // If definition of source is defined by trivial computation, try
 // rematerializing it.
-bool IsDefCopy;
+bool IsDefCopy = false;
 if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
   return true;
 
@@ -3798,7 +3798,7 @@
   if (!UseTerminalRule)
 return false;
   Register SrcReg, DstReg;
-  unsigned SrcSubReg, DstSubReg;
+  unsigned SrcSubReg = 0, DstSubReg = 0;
   if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
 return false;
   // Check if the destination of this copy has any other affinity.
@@ -3823,7 +3823,7 @@
 if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
   continue;
 Register OtherSrcReg, OtherReg;
-unsigned OtherSrcSubReg, OtherSubReg;
+unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
 if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
 OtherSubReg))
   return false;
Index: clang/lib/CodeGen/CGBlocks.cpp
===
--- clang/lib/CodeGen/CGBlocks.cpp
+++ clang/lib/CodeGen/CGBlocks.cpp
@@ -2697,7 +2697,7 @@
   }
 
   bool HasByrefExtendedLayout = false;
-  Qualifiers::ObjCLifetime Lifetime;
+  Qualifiers::ObjCLifetime Lifetime = Qualifiers::OCL_None;
   if (getContext().getByrefLifetime(Ty, Lifetime, HasByrefExtendedLayout) &&
   HasByrefExtendedLayout) {
 /// void *__byref_variable_layout;
@@ -2767,8 +2767,8 @@
   const VarDecl &D = *emission.Variable;
   QualType type = D.getType();
 
-  bool HasByrefExtendedLayout;
-  Qualifiers::ObjCLifetime ByrefLifetime;
+  bool HasByrefExtendedLayout = false;
+  Qualifiers::ObjCLifetime ByrefLifetime = Qualifiers::OCL_None;
   bool ByRefHasLifetime =
 getContext().getByrefLifetime(type, ByrefLifetime, HasByrefExtendedLayout);
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D94466: [X86] merge "={eax}" and "~{eax}" into "=&eax" for MSInlineASM

2021-01-26 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added inline comments.



Comment at: clang/lib/CodeGen/CGStmt.cpp:2490
+  continue;
+std::string::size_type position1 = Constraints.find("={eax}");
+if (position1 != std::string::npos) {

pengfei wrote:
> If `Clobber` is `edx` only, we shouldn't change `"={eax}"` to `"=&{eax}"`.
Yes! I'll update and add a test. THX for review!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D94466/new/

https://reviews.llvm.org/D94466

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D94466: [X86] merge "={eax}" and "~{eax}" into "=&eax" for MSInlineASM

2021-01-26 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 319282.
FreddyYe added a comment.

If Clobber is edx only, don't change "={eax}" to "=&{eax}".


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D94466/new/

https://reviews.llvm.org/D94466

Files:
  clang/lib/CodeGen/CGStmt.cpp
  clang/test/CodeGen/ms-inline-asm.c
  clang/test/CodeGenCXX/ms-inline-asm-return.cpp

Index: clang/test/CodeGenCXX/ms-inline-asm-return.cpp
===
--- clang/test/CodeGenCXX/ms-inline-asm-return.cpp
+++ clang/test/CodeGenCXX/ms-inline-asm-return.cpp
@@ -13,7 +13,17 @@
   }
 }
 // CHECK-LABEL: define dso_local i64 @f_i64()
-// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=A,~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&A,{{.*}}"
+// CHECK: ret i64 %[[r]]
+
+long long f_i64_reverse() {
+  __asm {
+mov edx, 1
+mov eax, 1
+  }
+}
+// CHECK-LABEL: define dso_local i64 @f_i64_reverse()
+// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov edx, $$1\0A\09mov eax, $$1", "=&A,{{.*}}"
 // CHECK: ret i64 %[[r]]
 
 int f_i32() {
@@ -23,7 +33,26 @@
   }
 }
 // CHECK-LABEL: define dso_local i32 @f_i32()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&{eax},~{edx},{{.*}}"
+// CHECK: ret i32 %[[r]]
+
+int f_i32_reverse() {
+  __asm {
+mov edx, 1
+mov eax, 1
+  }
+}
+// CHECK-LABEL: define dso_local i32 @f_i32_reverse()
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov edx, $$1\0A\09mov eax, $$1", "=&{eax},~{edx},{{.*}}"
+// CHECK: ret i32 %[[r]]
+
+int f_i32_edx() {
+  __asm {
+mov edx, 1
+  }
+}
+// CHECK-LABEL: define dso_local i32 @f_i32_edx()
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov edx, $$1", "={eax},~{edx},{{.*}}"
 // CHECK: ret i32 %[[r]]
 
 short f_i16() {
@@ -33,7 +62,7 @@
   }
 }
 // CHECK-LABEL: define dso_local signext i16 @f_i16()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&{eax},~{edx},{{.*}}"
 // CHECK: %[[r_i16:[^ ]*]] = trunc i32 %[[r]] to i16
 // CHECK: ret i16 %[[r_i16]]
 
@@ -44,7 +73,7 @@
   }
 }
 // CHECK-LABEL: define dso_local signext i8 @f_i8()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&{eax},~{edx},{{.*}}"
 // CHECK: %[[r_i8:[^ ]*]] = trunc i32 %[[r]] to i8
 // CHECK: ret i8 %[[r_i8]]
 
@@ -55,7 +84,7 @@
   }
 }
 // CHECK-LABEL: define dso_local zeroext i1 @f_i1()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&{eax},~{edx},{{.*}}"
 // CHECK: %[[r_i8:[^ ]*]] = trunc i32 %[[r]] to i8
 // CHECK: store i8 %[[r_i8]], i8* %{{.*}}
 // CHECK: %[[r_i1:[^ ]*]] = load i1, i1* %{{.*}}
@@ -70,7 +99,7 @@
   }
 }
 // CHECK-LABEL: define dso_local i32 @f_s4()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$16843009", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$16843009", "=&{eax},{{.*}}"
 // CHECK: store i32 %[[r]], i32* %{{.*}}
 // CHECK: %[[r_i32:[^ ]*]] = load i32, i32* %{{.*}}
 // CHECK: ret i32 %[[r_i32]]
@@ -85,7 +114,7 @@
   }
 }
 // CHECK-LABEL: define dso_local i64 @f_s8()
-// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$16843009\0A\09mov edx, $$85", "=A,~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$16843009\0A\09mov edx, $$85", "=&A,{{.*}}"
 // CHECK: store i64 %[[r]], i64* %{{.*}}
 // CHECK: %[[r_i64:[^ ]*]] = load i64, i64* %{{.*}}
 // CHECK: ret i64 %[[r_i64]]
@@ -96,5 +125,5 @@
   __asm xor eax, eax
 }
 // CHECK-LABEL: define dso_local i32 @main()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "xor eax, eax", "={eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "xor eax, eax", "=&{eax},{{.*}}"
 // CHECK: ret i32 %[[r]]
Index: clang/test/CodeGen/ms-inline-asm.c
===
--- clang/test/CodeGen/ms-inline-asm.c
+++ clang/test/CodeGen/ms-inline-asm.c
@@ -114,7 +114,7 @@
 // CHECK: call i32 asm sideeffect inteldialect
 // CHECK-SAME: mov eax, $2
 // CHECK-SAME: mov $0, eax
-// CHECK-SAME: "=*m,={eax},*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}})
+// CHECK-SAME: "=*m,=&{eax},*m,~{dirflag},~{fpsr

[PATCH] D94466: [X86] merge "={eax}" and "~{eax}" into "=&eax" for MSInlineASM

2021-01-26 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 319284.
FreddyYe added a comment.

refine clang-format


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D94466/new/

https://reviews.llvm.org/D94466

Files:
  clang/lib/CodeGen/CGStmt.cpp
  clang/test/CodeGen/ms-inline-asm.c
  clang/test/CodeGenCXX/ms-inline-asm-return.cpp

Index: clang/test/CodeGenCXX/ms-inline-asm-return.cpp
===
--- clang/test/CodeGenCXX/ms-inline-asm-return.cpp
+++ clang/test/CodeGenCXX/ms-inline-asm-return.cpp
@@ -13,7 +13,17 @@
   }
 }
 // CHECK-LABEL: define dso_local i64 @f_i64()
-// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=A,~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&A,{{.*}}"
+// CHECK: ret i64 %[[r]]
+
+long long f_i64_reverse() {
+  __asm {
+mov edx, 1
+mov eax, 1
+  }
+}
+// CHECK-LABEL: define dso_local i64 @f_i64_reverse()
+// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov edx, $$1\0A\09mov eax, $$1", "=&A,{{.*}}"
 // CHECK: ret i64 %[[r]]
 
 int f_i32() {
@@ -23,7 +33,26 @@
   }
 }
 // CHECK-LABEL: define dso_local i32 @f_i32()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&{eax},~{edx},{{.*}}"
+// CHECK: ret i32 %[[r]]
+
+int f_i32_reverse() {
+  __asm {
+mov edx, 1
+mov eax, 1
+  }
+}
+// CHECK-LABEL: define dso_local i32 @f_i32_reverse()
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov edx, $$1\0A\09mov eax, $$1", "=&{eax},~{edx},{{.*}}"
+// CHECK: ret i32 %[[r]]
+
+int f_i32_edx() {
+  __asm {
+mov edx, 1
+  }
+}
+// CHECK-LABEL: define dso_local i32 @f_i32_edx()
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov edx, $$1", "={eax},~{edx},{{.*}}"
 // CHECK: ret i32 %[[r]]
 
 short f_i16() {
@@ -33,7 +62,7 @@
   }
 }
 // CHECK-LABEL: define dso_local signext i16 @f_i16()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&{eax},~{edx},{{.*}}"
 // CHECK: %[[r_i16:[^ ]*]] = trunc i32 %[[r]] to i16
 // CHECK: ret i16 %[[r_i16]]
 
@@ -44,7 +73,7 @@
   }
 }
 // CHECK-LABEL: define dso_local signext i8 @f_i8()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&{eax},~{edx},{{.*}}"
 // CHECK: %[[r_i8:[^ ]*]] = trunc i32 %[[r]] to i8
 // CHECK: ret i8 %[[r_i8]]
 
@@ -55,7 +84,7 @@
   }
 }
 // CHECK-LABEL: define dso_local zeroext i1 @f_i1()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&{eax},~{edx},{{.*}}"
 // CHECK: %[[r_i8:[^ ]*]] = trunc i32 %[[r]] to i8
 // CHECK: store i8 %[[r_i8]], i8* %{{.*}}
 // CHECK: %[[r_i1:[^ ]*]] = load i1, i1* %{{.*}}
@@ -70,7 +99,7 @@
   }
 }
 // CHECK-LABEL: define dso_local i32 @f_s4()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$16843009", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$16843009", "=&{eax},{{.*}}"
 // CHECK: store i32 %[[r]], i32* %{{.*}}
 // CHECK: %[[r_i32:[^ ]*]] = load i32, i32* %{{.*}}
 // CHECK: ret i32 %[[r_i32]]
@@ -85,7 +114,7 @@
   }
 }
 // CHECK-LABEL: define dso_local i64 @f_s8()
-// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$16843009\0A\09mov edx, $$85", "=A,~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$16843009\0A\09mov edx, $$85", "=&A,{{.*}}"
 // CHECK: store i64 %[[r]], i64* %{{.*}}
 // CHECK: %[[r_i64:[^ ]*]] = load i64, i64* %{{.*}}
 // CHECK: ret i64 %[[r_i64]]
@@ -96,5 +125,5 @@
   __asm xor eax, eax
 }
 // CHECK-LABEL: define dso_local i32 @main()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "xor eax, eax", "={eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "xor eax, eax", "=&{eax},{{.*}}"
 // CHECK: ret i32 %[[r]]
Index: clang/test/CodeGen/ms-inline-asm.c
===
--- clang/test/CodeGen/ms-inline-asm.c
+++ clang/test/CodeGen/ms-inline-asm.c
@@ -114,7 +114,7 @@
 // CHECK: call i32 asm sideeffect inteldialect
 // CHECK-SAME: mov eax, $2
 // CHECK-SAME: mov $0, eax
-// CHECK-SAME: "=*m,={eax},*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}})
+// CHECK-SAME: "=*m,=&{eax},*m,~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i32* %{{.*}})

[PATCH] D94466: [X86] merge "={eax}" and "~{eax}" into "=&eax" for MSInlineASM

2021-01-27 Thread Freddy, Ye via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG1edb76cc91e7: [X86] merge "={eax}" and 
"~{eax}" into "=&eax" for MSInlineASM (authored by 
FreddyYe).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D94466/new/

https://reviews.llvm.org/D94466

Files:
  clang/lib/CodeGen/CGStmt.cpp
  clang/test/CodeGen/ms-inline-asm.c
  clang/test/CodeGenCXX/ms-inline-asm-return.cpp

Index: clang/test/CodeGenCXX/ms-inline-asm-return.cpp
===
--- clang/test/CodeGenCXX/ms-inline-asm-return.cpp
+++ clang/test/CodeGenCXX/ms-inline-asm-return.cpp
@@ -13,7 +13,17 @@
   }
 }
 // CHECK-LABEL: define dso_local i64 @f_i64()
-// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=A,~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&A,{{.*}}"
+// CHECK: ret i64 %[[r]]
+
+long long f_i64_reverse() {
+  __asm {
+mov edx, 1
+mov eax, 1
+  }
+}
+// CHECK-LABEL: define dso_local i64 @f_i64_reverse()
+// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov edx, $$1\0A\09mov eax, $$1", "=&A,{{.*}}"
 // CHECK: ret i64 %[[r]]
 
 int f_i32() {
@@ -23,7 +33,26 @@
   }
 }
 // CHECK-LABEL: define dso_local i32 @f_i32()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&{eax},~{edx},{{.*}}"
+// CHECK: ret i32 %[[r]]
+
+int f_i32_reverse() {
+  __asm {
+mov edx, 1
+mov eax, 1
+  }
+}
+// CHECK-LABEL: define dso_local i32 @f_i32_reverse()
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov edx, $$1\0A\09mov eax, $$1", "=&{eax},~{edx},{{.*}}"
+// CHECK: ret i32 %[[r]]
+
+int f_i32_edx() {
+  __asm {
+mov edx, 1
+  }
+}
+// CHECK-LABEL: define dso_local i32 @f_i32_edx()
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov edx, $$1", "={eax},~{edx},{{.*}}"
 // CHECK: ret i32 %[[r]]
 
 short f_i16() {
@@ -33,7 +62,7 @@
   }
 }
 // CHECK-LABEL: define dso_local signext i16 @f_i16()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&{eax},~{edx},{{.*}}"
 // CHECK: %[[r_i16:[^ ]*]] = trunc i32 %[[r]] to i16
 // CHECK: ret i16 %[[r_i16]]
 
@@ -44,7 +73,7 @@
   }
 }
 // CHECK-LABEL: define dso_local signext i8 @f_i8()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&{eax},~{edx},{{.*}}"
 // CHECK: %[[r_i8:[^ ]*]] = trunc i32 %[[r]] to i8
 // CHECK: ret i8 %[[r_i8]]
 
@@ -55,7 +84,7 @@
   }
 }
 // CHECK-LABEL: define dso_local zeroext i1 @f_i1()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$1\0A\09mov edx, $$1", "=&{eax},~{edx},{{.*}}"
 // CHECK: %[[r_i8:[^ ]*]] = trunc i32 %[[r]] to i8
 // CHECK: store i8 %[[r_i8]], i8* %{{.*}}
 // CHECK: %[[r_i1:[^ ]*]] = load i1, i1* %{{.*}}
@@ -70,7 +99,7 @@
   }
 }
 // CHECK-LABEL: define dso_local i32 @f_s4()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$16843009", "={eax},~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "mov eax, $$16843009", "=&{eax},{{.*}}"
 // CHECK: store i32 %[[r]], i32* %{{.*}}
 // CHECK: %[[r_i32:[^ ]*]] = load i32, i32* %{{.*}}
 // CHECK: ret i32 %[[r_i32]]
@@ -85,7 +114,7 @@
   }
 }
 // CHECK-LABEL: define dso_local i64 @f_s8()
-// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$16843009\0A\09mov edx, $$85", "=A,~{eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i64 asm sideeffect inteldialect "mov eax, $$16843009\0A\09mov edx, $$85", "=&A,{{.*}}"
 // CHECK: store i64 %[[r]], i64* %{{.*}}
 // CHECK: %[[r_i64:[^ ]*]] = load i64, i64* %{{.*}}
 // CHECK: ret i64 %[[r_i64]]
@@ -96,5 +125,5 @@
   __asm xor eax, eax
 }
 // CHECK-LABEL: define dso_local i32 @main()
-// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "xor eax, eax", "={eax},{{.*}}"
+// CHECK: %[[r:[^ ]*]] = call i32 asm sideeffect inteldialect "xor eax, eax", "=&{eax},{{.*}}"
 // CHECK: ret i32 %[[r]]
Index: clang/test/CodeGen/ms-inline-asm.c
===
--- clang/test/CodeGen/ms-inline-asm.c
+++ clang/test/CodeGen/ms-inline-asm.c
@@ -114,7 +114,7 @@
 // CHECK: call i32 asm sideeffect inteldialect
 // CHECK-SAME: mov eax, $2
 // CHECK-SAME: mov $0, eax
-// CHECK-SAME: "=*m,={eax},*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}}, i3

[PATCH] D110798: [NFC] Use CHECK-NEXT instead of CHECK-SAME in target-invalid-cpu-note.c

2021-09-29 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
Herald added subscribers: s.egerton, simoncook, fedor.sergeev.
FreddyYe requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D110798

Files:
  clang/test/Misc/target-invalid-cpu-note.c

Index: clang/test/Misc/target-invalid-cpu-note.c
===
--- clang/test/Misc/target-invalid-cpu-note.c
+++ clang/test/Misc/target-invalid-cpu-note.c
@@ -1,211 +1,95 @@
 // RUN: not %clang_cc1 -triple armv5--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix ARM
 // ARM: error: unknown target CPU 'not-a-cpu'
-// ARM: note: valid target CPU values are:
-// ARM-SAME: arm8
+// ARM-NEXT: note: valid target CPU values are: arm8, arm810, strongarm, strongarm110, strongarm1100, strongarm1110, arm7tdmi, arm7tdmi-s, arm710t, arm720t, arm9, arm9tdmi, arm920, arm920t, arm922t, arm940t, ep9312, arm10tdmi, arm1020t, arm9e, arm946e-s, arm966e-s, arm968e-s, arm10e, arm1020e, arm1022e, arm926ej-s, arm1136j-s, arm1136jf-s, mpcore, mpcorenovfp, arm1176jz-s, arm1176jzf-s, arm1156t2-s, arm1156t2f-s, cortex-m0, cortex-m0plus, cortex-m1, sc000, cortex-a5, cortex-a7, cortex-a8, cortex-a9, cortex-a12, cortex-a15, cortex-a17, krait, cortex-r4, cortex-r4f, cortex-r5, cortex-r7, cortex-r8, cortex-r52, sc300, cortex-m3, cortex-m4, cortex-m7, cortex-m23, cortex-m33, cortex-m35p, cortex-m55, cortex-a32, cortex-a35, cortex-a53, cortex-a55, cortex-a57, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-x1, neoverse-n1, neoverse-n2, neoverse-v1, cyclone, exynos-m3, exynos-m4, exynos-m5, kryo, iwmmxt, xscale, swift
 
 // RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64
 // AARCH64: error: unknown target CPU 'not-a-cpu'
-// AARCH64: note: valid target CPU values are:
-// AARCH64-SAME: cortex-a35,
+// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-r82, cortex-x1, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel
 
 // RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64
 // TUNE_AARCH64: error: unknown target CPU 'not-a-cpu'
-// TUNE_AARCH64: note: valid target CPU values are:
-// TUNE_AARCH64-SAME: cortex-a35,
+// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-r82, cortex-x1, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel
 
 // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
 // X86: error: unknown target CPU 'not-a-cpu'
-// X86: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3,
-// X86-SAME: i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3,
-// X86-SAME: pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott,
-// X86-SAME: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont,
-// X86-SAME: nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge,
-// X86-SAME: core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512,
-// X86-SAME: skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, knl, knm, lakemont, k6, k6-2, k6-3,
-// X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64,
-// X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10,
-// X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3,
-// X86-SAME: x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
+// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmer

[PATCH] D110798: [NFC] Use CHECK-NEXT instead of CHECK-SAME in target-invalid-cpu-note.c

2021-09-29 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

I found the old way cannot verify if there are some extra outputs between two 
different CHECK-SAME. So I changed to CHECK-NEXT. But it will introduce bad 
format issue. Anyway, the old way has broken clang-format already. So I would 
prefer the CHECK-NEXT. WDYT?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D110798/new/

https://reviews.llvm.org/D110798

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D110798: [NFC] Use CHECK-NEXT instead of CHECK-SAME in target-invalid-cpu-note.c

2021-09-30 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added inline comments.



Comment at: clang/test/Misc/target-invalid-cpu-note.c:33
-// X86_64: note: valid target CPU values are: nocona, core2, penryn, bonnell,
-// X86_64-SAME: atom, silvermont, slm, goldmont, goldmont-plus, tremont, 
nehalem, corei7, westmere,
-// X86_64-SAME: sandybridge, corei7-avx, ivybridge, core-avx-i, haswell,

I forgot to give an example, sorry. For example, if I delete the last 
target-cpu `bonnell,` here, this lit test can still pass. And if I delete any 
of the first or the last target-cpu on each of -SAME line in this file, the 
test can still pass. That is my concern. Fortunately, when I changed this file 
to -NEXT, no new fails happen this time, which means no missing CPUs between 
different -SAME in the old file.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D110798/new/

https://reviews.llvm.org/D110798

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D110798: [NFC] Use CHECK-NEXT instead of CHECK-SAME in target-invalid-cpu-note.c

2021-09-30 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

> In principle I agree but did you have this failure mode actually happen?

No failure happens for now, but may happen in the future if we continue to use 
-SAME. Pls read the example I gave in last comment.

> Not sure I like crazy long lines, but I see that -NEXT then -SAME would fall 
> to the same issue.



> Arm targets are just checking that we print *some* list of CPUs, others are 
> putting the full list. Which isn't great because if you add a new CPU it's 
> possible you'll not get a failure here. I looked for other tests that might 
> check the exact set of CPUs but this is the only one.

Yeah, I suppose this is the only one test to check this valid CPU list? Then I 
suppose to add check whole list for Arm targets.

> I think a reasonable compromise is to -NEXT the `note: valid target CPU 
> values are: ` then -SAME the rest. Check the last line 
> ends in `{{$}}`. That limits where extra stuff can sneak in and means you can 
> read the file and it's failure output more easily. (each -SAME line has 
> multiple CPUs on it so that limits how much can be missed)

Can you read the latest example I comment? I think you misunderstand the extra 
outputs I mentioned. Or if I'm wrong, can you give an inline example?

> If you feel like adding the full CPU list to the Arm targets go ahead.

Good to know your thoughts! I'll do.

> What does clang-format complain about? This is a test file so formatting is 
> less of a concern than being readable for maintainers and having useful 
> FileCheck output. Splitting the matches enables that.

Sorry I didn't realize before the fact you mentioned here, let's ignore that 
comment.

Thanks for your review. Helped a lot!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D110798/new/

https://reviews.llvm.org/D110798

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107420: [sema] Disallow __builtin_mul_overflow under special condition.

2021-08-03 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
FreddyYe requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

When __builtin_mul_overflow has input combination of (signed, signed,
unsigned*) and arbitary width is larger than 64 bits, it will also
generate i256 in backend. Disallow it before we improve in backend.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D107420

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaChecking.cpp
  clang/test/Sema/builtins-overflow.c


Index: clang/test/Sema/builtins-overflow.c
===
--- clang/test/Sema/builtins-overflow.c
+++ clang/test/Sema/builtins-overflow.c
@@ -38,4 +38,10 @@
 _ExtInt(129) result;
 _Bool status = __builtin_mul_overflow(x, y, &result); // expected-error 
{{__builtin_mul_overflow does not support signed _ExtInt operands of more than 
128 bits}}
   }
+  {
+_ExtInt(128) x = 1;
+_ExtInt(128) y = 1;
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected-error 
{{__builtin_mul_overflow does not support special combination operands (signed, 
signed, unsigned*) of more than 64 bits}}
+  }
 }
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -328,15 +328,35 @@
   // Disallow signed ExtIntType args larger than 128 bits to mul function until
   // we improve backend support.
   if (BuiltinID == Builtin::BI__builtin_mul_overflow) {
-for (unsigned I = 0; I < 3; ++I) {
-  const auto Arg = TheCall->getArg(I);
-  // Third argument will be a pointer.
-  auto Ty = I < 2 ? Arg->getType() : Arg->getType()->getPointeeType();
-  if (Ty->isExtIntType() && Ty->isSignedIntegerType() &&
-  S.getASTContext().getIntWidth(Ty) > 128)
-return S.Diag(Arg->getBeginLoc(),
-  diag::err_overflow_builtin_ext_int_max_size)
-   << 128;
+const auto LeftTy = TheCall->getArg(0)->getType();
+const auto RightTy = TheCall->getArg(1)->getType();
+const auto ResultTy = TheCall->getArg(2)->getType()->getPointeeType();
+// Input compination below will also emit integer value larger than
+// 128 bits in backend, disallow same as above.
+if (LeftTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(LeftTy) > 64 &&
+RightTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(RightTy) > 64 &&
+!ResultTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(ResultTy) > 64) {
+  return S.Diag(TheCall->getArg(0)->getBeginLoc(),
+diag::err_overflow_builtin_special_combination_max_size)
+ << 64;
+}
+else if ((LeftTy->isExtIntType() && LeftTy->isSignedIntegerType() &&
+  S.getASTContext().getIntWidth(LeftTy) > 128)) {
+  return S.Diag(TheCall->getArg(0)->getBeginLoc(),
+diag::err_overflow_builtin_ext_int_max_size) << 128;
+}
+else if ((RightTy->isExtIntType() && RightTy->isSignedIntegerType() &&
+  S.getASTContext().getIntWidth(RightTy) > 128)) {
+  return S.Diag(TheCall->getArg(1)->getBeginLoc(),
+diag::err_overflow_builtin_ext_int_max_size) << 128;
+}
+else if ((ResultTy->isExtIntType() && ResultTy->isSignedIntegerType() &&
+  S.getASTContext().getIntWidth(ResultTy) > 128)) {
+  return S.Diag(TheCall->getArg(2)->getBeginLoc(),
+diag::err_overflow_builtin_ext_int_max_size) << 128;
 }
   }
 
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8348,6 +8348,9 @@
 def err_overflow_builtin_ext_int_max_size : Error<
   "__builtin_mul_overflow does not support signed _ExtInt operands of more "
   "than %0 bits">;
+def err_overflow_builtin_special_combination_max_size : Error<
+  "__builtin_mul_overflow does not support special combination operands 
(signed, signed, unsigned*) "
+  "of more than %0 bits">;
 
 def err_atomic_load_store_uses_lib : Error<
   "atomic %select{load|store}0 requires runtime support that is not "


Index: clang/test/Sema/builtins-overflow.c
===
--- clang/test/Sema/builtins-overflow.c
+++ clang/test/Sema/builtins-overflow.c
@@ -38,4 +38,10 @@
 _ExtInt(129) result;
 _Bool status = __builtin_mul_overflow(x, y, &result); // expected-error {{__builtin_mul_overflow does not support signed _ExtInt operands of more than 128 bits}}
   }
+  {
+_ExtInt(128) x = 1;
+_ExtInt(128) y = 1;
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected-erro

[PATCH] D107420: [sema] Disallow __builtin_mul_overflow under special condition.

2021-08-03 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 363950.
FreddyYe added a comment.

update commit message


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107420/new/

https://reviews.llvm.org/D107420

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaChecking.cpp
  clang/test/Sema/builtins-overflow.c


Index: clang/test/Sema/builtins-overflow.c
===
--- clang/test/Sema/builtins-overflow.c
+++ clang/test/Sema/builtins-overflow.c
@@ -38,4 +38,10 @@
 _ExtInt(129) result;
 _Bool status = __builtin_mul_overflow(x, y, &result); // expected-error 
{{__builtin_mul_overflow does not support signed _ExtInt operands of more than 
128 bits}}
   }
+  {
+_ExtInt(128) x = 1;
+_ExtInt(128) y = 1;
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected-error 
{{__builtin_mul_overflow does not support special combination operands (signed, 
signed, unsigned*) of more than 64 bits}}
+  }
 }
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -328,15 +328,35 @@
   // Disallow signed ExtIntType args larger than 128 bits to mul function until
   // we improve backend support.
   if (BuiltinID == Builtin::BI__builtin_mul_overflow) {
-for (unsigned I = 0; I < 3; ++I) {
-  const auto Arg = TheCall->getArg(I);
-  // Third argument will be a pointer.
-  auto Ty = I < 2 ? Arg->getType() : Arg->getType()->getPointeeType();
-  if (Ty->isExtIntType() && Ty->isSignedIntegerType() &&
-  S.getASTContext().getIntWidth(Ty) > 128)
-return S.Diag(Arg->getBeginLoc(),
-  diag::err_overflow_builtin_ext_int_max_size)
-   << 128;
+const auto LeftTy = TheCall->getArg(0)->getType();
+const auto RightTy = TheCall->getArg(1)->getType();
+const auto ResultTy = TheCall->getArg(2)->getType()->getPointeeType();
+// Input compination below will also emit integer value larger than
+// 128 bits in backend, disallow same as above.
+if (LeftTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(LeftTy) > 64 &&
+RightTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(RightTy) > 64 &&
+!ResultTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(ResultTy) > 64) {
+  return S.Diag(TheCall->getArg(0)->getBeginLoc(),
+diag::err_overflow_builtin_special_combination_max_size)
+ << 64;
+}
+else if ((LeftTy->isExtIntType() && LeftTy->isSignedIntegerType() &&
+  S.getASTContext().getIntWidth(LeftTy) > 128)) {
+  return S.Diag(TheCall->getArg(0)->getBeginLoc(),
+diag::err_overflow_builtin_ext_int_max_size) << 128;
+}
+else if ((RightTy->isExtIntType() && RightTy->isSignedIntegerType() &&
+  S.getASTContext().getIntWidth(RightTy) > 128)) {
+  return S.Diag(TheCall->getArg(1)->getBeginLoc(),
+diag::err_overflow_builtin_ext_int_max_size) << 128;
+}
+else if ((ResultTy->isExtIntType() && ResultTy->isSignedIntegerType() &&
+  S.getASTContext().getIntWidth(ResultTy) > 128)) {
+  return S.Diag(TheCall->getArg(2)->getBeginLoc(),
+diag::err_overflow_builtin_ext_int_max_size) << 128;
 }
   }
 
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8348,6 +8348,9 @@
 def err_overflow_builtin_ext_int_max_size : Error<
   "__builtin_mul_overflow does not support signed _ExtInt operands of more "
   "than %0 bits">;
+def err_overflow_builtin_special_combination_max_size : Error<
+  "__builtin_mul_overflow does not support special combination operands 
(signed, signed, unsigned*) "
+  "of more than %0 bits">;
 
 def err_atomic_load_store_uses_lib : Error<
   "atomic %select{load|store}0 requires runtime support that is not "


Index: clang/test/Sema/builtins-overflow.c
===
--- clang/test/Sema/builtins-overflow.c
+++ clang/test/Sema/builtins-overflow.c
@@ -38,4 +38,10 @@
 _ExtInt(129) result;
 _Bool status = __builtin_mul_overflow(x, y, &result); // expected-error {{__builtin_mul_overflow does not support signed _ExtInt operands of more than 128 bits}}
   }
+  {
+_ExtInt(128) x = 1;
+_ExtInt(128) y = 1;
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected-error {{__builtin_mul_overflow does not support special combination operands (signed, signed, unsigned*) of more than 64 bits}}
+  }
 }
Index: clang/lib/Sema/SemaChecking.cpp
===

[PATCH] D107420: [WIP][sema] Disallow __builtin_mul_overflow under special condition.

2021-08-03 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

I just realized that LLVM has a gap with GCC due to the compiler-rt call. Check 
this example:
https://gcc.godbolt.org/z/8Pf7zErr7
I need further investigation to know why this gap happens. Pls don't review the 
code.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107420/new/

https://reviews.llvm.org/D107420

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107420: [WIP][sema] Disallow __builtin_mul_overflow under special condition.

2021-08-04 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 363991.
FreddyYe added a comment.

rebase and refactor clang-format and clang-tidy.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107420/new/

https://reviews.llvm.org/D107420

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaChecking.cpp
  clang/test/Sema/builtins-overflow.c


Index: clang/test/Sema/builtins-overflow.c
===
--- clang/test/Sema/builtins-overflow.c
+++ clang/test/Sema/builtins-overflow.c
@@ -38,4 +38,10 @@
 _ExtInt(129) result;
 _Bool status = __builtin_mul_overflow(x, y, &result); // expected-error 
{{__builtin_mul_overflow does not support signed _ExtInt operands of more than 
128 bits}}
   }
+  {
+_ExtInt(128) x = 1;
+_ExtInt(128) y = 1;
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected-error 
{{__builtin_mul_overflow does not support special combination operands (signed, 
signed, unsigned*) of more than 64 bits}}
+  }
 }
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -328,6 +328,21 @@
   // Disallow signed ExtIntType args larger than 128 bits to mul function until
   // we improve backend support.
   if (BuiltinID == Builtin::BI__builtin_mul_overflow) {
+const auto LeftTy = TheCall->getArg(0)->getType();
+const auto RightTy = TheCall->getArg(1)->getType();
+const auto ResultTy = TheCall->getArg(2)->getType()->getPointeeType();
+// Input compination below will also emit integer value larger than
+// 128 bits in backend, disallow same as above.
+if (LeftTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(LeftTy) > 64 &&
+RightTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(RightTy) > 64 &&
+!ResultTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(ResultTy) > 64) {
+  return S.Diag(TheCall->getArg(0)->getBeginLoc(),
+diag::err_overflow_builtin_special_combination_max_size)
+ << 64;
+}
 for (unsigned I = 0; I < 3; ++I) {
   const auto Arg = TheCall->getArg(I);
   // Third argument will be a pointer.
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8348,6 +8348,9 @@
 def err_overflow_builtin_ext_int_max_size : Error<
   "__builtin_mul_overflow does not support signed _ExtInt operands of more "
   "than %0 bits">;
+def err_overflow_builtin_special_combination_max_size : Error<
+  "__builtin_mul_overflow does not support special combination operands 
(signed, signed, unsigned*) "
+  "of more than %0 bits">;
 
 def err_atomic_load_store_uses_lib : Error<
   "atomic %select{load|store}0 requires runtime support that is not "


Index: clang/test/Sema/builtins-overflow.c
===
--- clang/test/Sema/builtins-overflow.c
+++ clang/test/Sema/builtins-overflow.c
@@ -38,4 +38,10 @@
 _ExtInt(129) result;
 _Bool status = __builtin_mul_overflow(x, y, &result); // expected-error {{__builtin_mul_overflow does not support signed _ExtInt operands of more than 128 bits}}
   }
+  {
+_ExtInt(128) x = 1;
+_ExtInt(128) y = 1;
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected-error {{__builtin_mul_overflow does not support special combination operands (signed, signed, unsigned*) of more than 64 bits}}
+  }
 }
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -328,6 +328,21 @@
   // Disallow signed ExtIntType args larger than 128 bits to mul function until
   // we improve backend support.
   if (BuiltinID == Builtin::BI__builtin_mul_overflow) {
+const auto LeftTy = TheCall->getArg(0)->getType();
+const auto RightTy = TheCall->getArg(1)->getType();
+const auto ResultTy = TheCall->getArg(2)->getType()->getPointeeType();
+// Input compination below will also emit integer value larger than
+// 128 bits in backend, disallow same as above.
+if (LeftTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(LeftTy) > 64 &&
+RightTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(RightTy) > 64 &&
+!ResultTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(ResultTy) > 64) {
+  return S.Diag(TheCall->getArg(0)->getBeginLoc(),
+diag::err_overflow_builtin_special_combination_max_size)
+ << 64;
+}
 for (unsigned I = 0; I < 3; ++I) {
   c

[PATCH] D107420: [WIP][sema] Disallow __builtin_mul_overflow under special condition.

2021-08-04 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 364154.
FreddyYe marked 2 inline comments as done.
FreddyYe added a comment.

Address comments. I'll refactor clang-format later. Pls help review the new 
condition and diagnostic.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107420/new/

https://reviews.llvm.org/D107420

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaChecking.cpp
  clang/test/Sema/builtins-overflow.c


Index: clang/test/Sema/builtins-overflow.c
===
--- clang/test/Sema/builtins-overflow.c
+++ clang/test/Sema/builtins-overflow.c
@@ -38,4 +38,10 @@
 _ExtInt(129) result;
 _Bool status = __builtin_mul_overflow(x, y, &result); // expected-error 
{{__builtin_mul_overflow does not support signed _ExtInt operands of more than 
128 bits}}
   }
+  {
+_ExtInt(128) x = 1;
+_ExtInt(128) y = 1;
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected-error 
{{__builtin_mul_overflow does not support special combination operands (signed, 
signed, unsigned*) of more than 64 bits}}
+  }
 }
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -328,6 +328,20 @@
   // Disallow signed ExtIntType args larger than 128 bits to mul function until
   // we improve backend support.
   if (BuiltinID == Builtin::BI__builtin_mul_overflow) {
+const auto LeftTy = TheCall->getArg(0)->getType();
+const auto RightTy = TheCall->getArg(1)->getType();
+const auto ResultPointeeTy = 
TheCall->getArg(2)->getType()->getPointeeType();
+// Input combination below will also emit an integer value larger than
+// 128 bits in the backend, disallow same as above.
+if (!ResultTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(ResultTy) >= 128 &&
+(LeftTy->isSignedIntegerType() || RightTy->isSignedIntegerType()) &&
+(S.getASTContext().getIntWidth(LeftTy) + 
S.getASTContext().getIntWidth(RightTy)) > 128 &&
+) {
+  return S.Diag(TheCall->getArg(0)->getBeginLoc(),
+diag::err_overflow_builtin_special_combination_max_size)
+ << 127;
+}
 for (unsigned I = 0; I < 3; ++I) {
   const auto Arg = TheCall->getArg(I);
   // Third argument will be a pointer.
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8348,6 +8348,9 @@
 def err_overflow_builtin_ext_int_max_size : Error<
   "__builtin_mul_overflow does not support signed _ExtInt operands of more "
   "than %0 bits">;
+def err_overflow_builtin_special_combination_max_size : Error<
+  "__builtin_mul_overflow does not suport unsigned overflow check after 
convention "
+  "more than %0 bits">;
 
 def err_atomic_load_store_uses_lib : Error<
   "atomic %select{load|store}0 requires runtime support that is not "


Index: clang/test/Sema/builtins-overflow.c
===
--- clang/test/Sema/builtins-overflow.c
+++ clang/test/Sema/builtins-overflow.c
@@ -38,4 +38,10 @@
 _ExtInt(129) result;
 _Bool status = __builtin_mul_overflow(x, y, &result); // expected-error {{__builtin_mul_overflow does not support signed _ExtInt operands of more than 128 bits}}
   }
+  {
+_ExtInt(128) x = 1;
+_ExtInt(128) y = 1;
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected-error {{__builtin_mul_overflow does not support special combination operands (signed, signed, unsigned*) of more than 64 bits}}
+  }
 }
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -328,6 +328,20 @@
   // Disallow signed ExtIntType args larger than 128 bits to mul function until
   // we improve backend support.
   if (BuiltinID == Builtin::BI__builtin_mul_overflow) {
+const auto LeftTy = TheCall->getArg(0)->getType();
+const auto RightTy = TheCall->getArg(1)->getType();
+const auto ResultPointeeTy = TheCall->getArg(2)->getType()->getPointeeType();
+// Input combination below will also emit an integer value larger than
+// 128 bits in the backend, disallow same as above.
+if (!ResultTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(ResultTy) >= 128 &&
+(LeftTy->isSignedIntegerType() || RightTy->isSignedIntegerType()) &&
+(S.getASTContext().getIntWidth(LeftTy) + S.getASTContext().getIntWidth(RightTy)) > 128 &&
+) {
+  return S.Diag(TheCall->getArg(0)->getBeginLoc(),
+diag::err_overflow_builtin_specia

[PATCH] D107420: [sema] Disallow __builtin_mul_overflow under special condition.

2021-08-04 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked an inline comment as done.
FreddyYe added a comment.

Addressed. THX review!




Comment at: clang/test/Sema/builtins-overflow.c:45
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected-error 
{{__builtin_mul_overflow does not support special combination operands (signed, 
signed, unsigned*) of more than 64 bits}}
+  }

aaron.ballman wrote:
> Yeah, this diagnostic really doesn't tell me what's going wrong with the code 
> or how to fix it. Do we basically want to prevent using larger-than-64-bit 
> argument types with mixed signs? Or are there other problematic circumstances?
Yes, let me try to refine. I can explain more what happened to such input 
combination.
According to gcc's the definition on this builtin: 
https://gcc.gnu.org/onlinedocs/gcc/Integer-Overflow-Builtins.html
'These built-in functions promote the first two operands into infinite 
precision signed type and perform multiply on those promoted operands. The 
result is then cast to the type the third pointer argument points to and stored 
there.' 

Since signing integer has a smaller range than unsigned integer. And now the 
API in compiler-rt (`__muloti4`) to checking 128 integer's multiplying is 
implemented in signed version. So the overflow max absolute value it can check 
is 2^127. When the result input is larger equal than 128 bits, `__muloti4` has 
no usage. We should prevent this situation for now. Or the backend will crush 
as the example shows.

I found the input operand doesn't need both of them larger than 64 bits, but 
just the sum of their larger 128. I'll refine in my patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107420/new/

https://reviews.llvm.org/D107420

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107420: [sema] Disallow __builtin_mul_overflow under special condition.

2021-08-04 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked an inline comment as done.
FreddyYe added inline comments.



Comment at: clang/include/clang/Basic/DiagnosticSemaKinds.td:8351-8353
+def err_overflow_builtin_special_combination_max_size : Error<
+  "__builtin_mul_overflow does not suport unsigned overflow check after 
convention "
+  "more than %0 bits">;

The new diagnostic is here. Forgot to update tests since I've not rebuilt 
completely.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107420/new/

https://reviews.llvm.org/D107420

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107420: [sema] Disallow __builtin_mul_overflow under special condition.

2021-08-04 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 364349.
FreddyYe added a comment.

update lit test, clang-format, if condition, diagnostic


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107420/new/

https://reviews.llvm.org/D107420

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaChecking.cpp
  clang/test/Sema/builtins-overflow.c


Index: clang/test/Sema/builtins-overflow.c
===
--- clang/test/Sema/builtins-overflow.c
+++ clang/test/Sema/builtins-overflow.c
@@ -38,4 +38,22 @@
 _ExtInt(129) result;
 _Bool status = __builtin_mul_overflow(x, y, &result); // expected-error 
{{__builtin_mul_overflow does not support signed _ExtInt operands of more than 
128 bits}}
   }
+  {
+_ExtInt(128) x = 1;
+_ExtInt(128) y = 1;
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected-error 
{{when __builtin_mul_overflow's result argument points to unsigned 128 bit 
integer, pls make sure input operands won't produce positive (2^127)~(2^128-1) 
from two negative integer value. That's not supported yet.}}
+  }
+  {
+_ExtInt(65) x = 1;
+_ExtInt(64) y = 1;
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected-error 
{{when __builtin_mul_overflow's result argument points to unsigned 128 bit 
integer, pls make sure input operands won't produce positive (2^127)~(2^128-1) 
from two negative integer value. That's not supported yet.}}
+  }
+  {
+_ExtInt(64) x = 1;
+_ExtInt(64) y = 1;
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected ok
+  }
 }
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -328,6 +328,22 @@
   // Disallow signed ExtIntType args larger than 128 bits to mul function until
   // we improve backend support.
   if (BuiltinID == Builtin::BI__builtin_mul_overflow) {
+const auto LeftTy = TheCall->getArg(0)->getType();
+const auto RightTy = TheCall->getArg(1)->getType();
+const auto ResultPointeeTy =
+TheCall->getArg(2)->getType()->getPointeeType();
+// Input combination below will also emit an integer value larger than
+// 128 bits in the backend, disallow same as above.
+if (!ResultPointeeTy->isSignedIntegerType() &&
+S.getASTContext().getIntWidth(ResultPointeeTy) == 128 &&
+((LeftTy->isSignedIntegerType() ? S.getASTContext().getIntWidth(LeftTy)
+: 0) +
+ (RightTy->isSignedIntegerType()
+  ? S.getASTContext().getIntWidth(RightTy)
+  : 0)) > 128) {
+  return S.Diag(TheCall->getArg(0)->getBeginLoc(),
+diag::err_overflow_builtin_mul_special_condition_max_size);
+}
 for (unsigned I = 0; I < 3; ++I) {
   const auto Arg = TheCall->getArg(I);
   // Third argument will be a pointer.
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8348,6 +8348,10 @@
 def err_overflow_builtin_ext_int_max_size : Error<
   "__builtin_mul_overflow does not support signed _ExtInt operands of more "
   "than %0 bits">;
+def err_overflow_builtin_mul_special_condition_max_size : Error<
+  "when __builtin_mul_overflow's result argument points to unsigned 128 bit 
integer, pls make sure "
+  "input operands won't produce positive (2^127)~(2^128-1) from two negative 
integer value. That's "
+  "not supported yet.">;
 
 def err_atomic_load_store_uses_lib : Error<
   "atomic %select{load|store}0 requires runtime support that is not "


Index: clang/test/Sema/builtins-overflow.c
===
--- clang/test/Sema/builtins-overflow.c
+++ clang/test/Sema/builtins-overflow.c
@@ -38,4 +38,22 @@
 _ExtInt(129) result;
 _Bool status = __builtin_mul_overflow(x, y, &result); // expected-error {{__builtin_mul_overflow does not support signed _ExtInt operands of more than 128 bits}}
   }
+  {
+_ExtInt(128) x = 1;
+_ExtInt(128) y = 1;
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected-error {{when __builtin_mul_overflow's result argument points to unsigned 128 bit integer, pls make sure input operands won't produce positive (2^127)~(2^128-1) from two negative integer value. That's not supported yet.}}
+  }
+  {
+_ExtInt(65) x = 1;
+_ExtInt(64) y = 1;
+unsigned _ExtInt(128) result;
+_Bool status = __builtin_mul_overflow(x, y, &result); // expected-error {{when __builtin_mul_overflow's result argument points to unsigned 128 bit integer, pls make sure i

[PATCH] D107420: [sema] Disallow __builtin_mul_overflow under special condition.

2021-08-05 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

In D107420#2927856 , @lebedev.ri 
wrote:

> I don't personally care, but i think this diag doesn't make sense.
> What is "backend"? Which one? All of them? What happens when one, but not all 
> of them supports it?
> What if i don't intend to codegen this into an assembly, but only want to 
> produce the IR?

THX for comment. The root cause unable to support this is same as the code 
added before (line 328-329). I'm suppossing we can view this patch as a 
supplementary patch of the old one?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107420/new/

https://reviews.llvm.org/D107420

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107420: [sema] Disallow __builtin_mul_overflow under special condition.

2021-08-05 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

In D107420#2928975 , @craig.topper 
wrote:

> I put up a patch for a simple fix for this in the backend. 
> https://reviews.llvm.org/D107581  The generated code is not optimal, but 
> maybe better than frontend workarounds.

THX for fix! LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107420/new/

https://reviews.llvm.org/D107420

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107946: [X86] Reverse *_set_ph and *_setr_ph 's set order.

2021-08-11 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
FreddyYe requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D107946

Files:
  clang/lib/Headers/avx512fp16intrin.h
  clang/lib/Headers/avx512vlfp16intrin.h


Index: clang/lib/Headers/avx512vlfp16intrin.h
===
--- clang/lib/Headers/avx512vlfp16intrin.h
+++ clang/lib/Headers/avx512vlfp16intrin.h
@@ -48,7 +48,7 @@
 static __inline __m128h __DEFAULT_FN_ATTRS128
 _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
_Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
-  return (__m128h)(__v8hf){__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8};
+  return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
 }
 
 static __inline __m256h __DEFAULT_FN_ATTRS256
@@ -56,9 +56,9 @@
   _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
   _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
   _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16) {
-  return (__m256h)(__v16hf){__h1,  __h2,  __h3,  __h4,  __h5,  __h6,
-__h7,  __h8,  __h9,  __h10, __h11, __h12,
-__h13, __h14, __h15, __h16};
+  return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11,
+__h10, __h9,  __h8,  __h7,  __h6,  __h5,
+__h4,  __h3,  __h2,  __h1};
 }
 
 #define _mm_setr_ph(__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8)
\
Index: clang/lib/Headers/avx512fp16intrin.h
===
--- clang/lib/Headers/avx512fp16intrin.h
+++ clang/lib/Headers/avx512fp16intrin.h
@@ -82,11 +82,11 @@
   _Float16 __h21, _Float16 __h22, _Float16 __h23, _Float16 __h24,
   _Float16 __h25, _Float16 __h26, _Float16 __h27, _Float16 __h28,
   _Float16 __h29, _Float16 __h30, _Float16 __h31, _Float16 __h32) {
-  return (__m512h)(__v32hf){__h1,  __h2,  __h3,  __h4,  __h5,  __h6,  __h7,
-__h8,  __h9,  __h10, __h11, __h12, __h13, __h14,
-__h15, __h16, __h17, __h18, __h19, __h20, __h21,
-__h22, __h23, __h24, __h25, __h26, __h27, __h28,
-__h29, __h30, __h31, __h32};
+  return (__m512h)(__v32hf){__h32, __h31, __h30, __h29, __h28, __h27, __h26,
+__h25, __h24, __h23, __h22, __h21, __h20, __h19,
+__h18, __h17, __h16, __h15, __h14, __h13, __h12,
+__h11, __h10, __h9,  __h8,  __h7,  __h6,  __h5,
+__h4,  __h3,  __h2,  __h1};
 }
 
 #define _mm512_setr_ph(__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8, __h9,   
\


Index: clang/lib/Headers/avx512vlfp16intrin.h
===
--- clang/lib/Headers/avx512vlfp16intrin.h
+++ clang/lib/Headers/avx512vlfp16intrin.h
@@ -48,7 +48,7 @@
 static __inline __m128h __DEFAULT_FN_ATTRS128
 _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
_Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
-  return (__m128h)(__v8hf){__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8};
+  return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
 }
 
 static __inline __m256h __DEFAULT_FN_ATTRS256
@@ -56,9 +56,9 @@
   _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
   _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
   _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16) {
-  return (__m256h)(__v16hf){__h1,  __h2,  __h3,  __h4,  __h5,  __h6,
-__h7,  __h8,  __h9,  __h10, __h11, __h12,
-__h13, __h14, __h15, __h16};
+  return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11,
+__h10, __h9,  __h8,  __h7,  __h6,  __h5,
+__h4,  __h3,  __h2,  __h1};
 }
 
 #define _mm_setr_ph(__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8)\
Index: clang/lib/Headers/avx512fp16intrin.h
===
--- clang/lib/Headers/avx512fp16intrin.h
+++ clang/lib/Headers/avx512fp16intrin.h
@@ -82,11 +82,11 @@
   _Float16 __h21, _Float16 __h22, _Float16 __h23, _Float16 __h24,
   _Float16 __h25, _Float16 __h26, _Float16 __h27, _Float16 __h28,
   _Float16 __h29, _Float16 __h30, _Float16 __h31, _Float16 __h32) {
-  return (__m512h)(__v32hf){__h1,  __h2,  __h3,  __h4,  __h5,  __h6,  __h7,
-__h8,  __h9,  __h10, __h11, __h12, __h13, __h14,
-__h15, __h16, __h17, __h18, __h19, __h20, __h21,
-   

[PATCH] D107946: [X86] Reverse *_set_ph and *_setr_ph 's set order.

2021-08-12 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 365923.
FreddyYe added a comment.

address comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107946/new/

https://reviews.llvm.org/D107946

Files:
  clang/lib/Headers/avx512fp16intrin.h
  clang/lib/Headers/avx512vlfp16intrin.h


Index: clang/lib/Headers/avx512vlfp16intrin.h
===
--- clang/lib/Headers/avx512vlfp16intrin.h
+++ clang/lib/Headers/avx512vlfp16intrin.h
@@ -48,7 +48,7 @@
 static __inline __m128h __DEFAULT_FN_ATTRS128
 _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
_Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
-  return (__m128h)(__v8hf){__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8};
+  return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
 }
 
 static __inline __m256h __DEFAULT_FN_ATTRS256
@@ -56,19 +56,18 @@
   _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
   _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
   _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16) {
-  return (__m256h)(__v16hf){__h1,  __h2,  __h3,  __h4,  __h5,  __h6,
-__h7,  __h8,  __h9,  __h10, __h11, __h12,
-__h13, __h14, __h15, __h16};
+  return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11,
+__h10, __h9,  __h8,  __h7,  __h6,  __h5,
+__h4,  __h3,  __h2,  __h1};
 }
 
-#define _mm_setr_ph(__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8)
\
-  _mm_set_ph((__h8), (__h7), (__h6), (__h5), (__h4), (__h3), (__h2), (__h1))
+#define _mm_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8)
\
+  _mm_set_ph((h8), (h7), (h6), (h5), (h4), (h3), (h2), (h1))
 
-#define _mm256_setr_ph(__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8, __h9,   
\
-   __h10, __h11, __h12, __h13, __h14, __h15, __h16)
\
-  _mm256_set_ph((__h16), (__h15), (__h14), (__h13), (__h12), (__h11), (__h10), 
\
-(__h9), (__h8), (__h7), (__h6), (__h5), (__h4), (__h3),
\
-(__h2), (__h1))
+#define _mm256_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, 
\
+   h14, h15, h16)  
\
+  _mm256_set_ph((h16), (h15), (h14), (h13), (h12), (h11), (h10), (h9), (h8),   
\
+(h7), (h6), (h5), (h4), (h3), (h2), (h1))
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_abs_ph(__m256h __A) {
   return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), 
(__m256i)__A);
Index: clang/lib/Headers/avx512fp16intrin.h
===
--- clang/lib/Headers/avx512fp16intrin.h
+++ clang/lib/Headers/avx512fp16intrin.h
@@ -82,22 +82,20 @@
   _Float16 __h21, _Float16 __h22, _Float16 __h23, _Float16 __h24,
   _Float16 __h25, _Float16 __h26, _Float16 __h27, _Float16 __h28,
   _Float16 __h29, _Float16 __h30, _Float16 __h31, _Float16 __h32) {
-  return (__m512h)(__v32hf){__h1,  __h2,  __h3,  __h4,  __h5,  __h6,  __h7,
-__h8,  __h9,  __h10, __h11, __h12, __h13, __h14,
-__h15, __h16, __h17, __h18, __h19, __h20, __h21,
-__h22, __h23, __h24, __h25, __h26, __h27, __h28,
-__h29, __h30, __h31, __h32};
-}
-
-#define _mm512_setr_ph(__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8, __h9,   
\
-   __h10, __h11, __h12, __h13, __h14, __h15, __h16, __h17, 
\
-   __h18, __h19, __h20, __h21, __h22, __h23, __h24, __h25, 
\
-   __h26, __h27, __h28, __h29, __h30, __h31, __h32)
\
-  _mm512_set_ph((__h32), (__h31), (__h30), (__h29), (__h28), (__h27), (__h26), 
\
-(__h25), (__h24), (__h23), (__h22), (__h21), (__h20), (__h19), 
\
-(__h18), (__h17), (__h16), (__h15), (__h14), (__h13), (__h12), 
\
-(__h11), (__h10), (__h9), (__h8), (__h7), (__h6), (__h5),  
\
-(__h4), (__h3), (__h2), (__h1))
+  return (__m512h)(__v32hf){__h32, __h31, __h30, __h29, __h28, __h27, __h26,
+__h25, __h24, __h23, __h22, __h21, __h20, __h19,
+__h18, __h17, __h16, __h15, __h14, __h13, __h12,
+__h11, __h10, __h9,  __h8,  __h7,  __h6,  __h5,
+__h4,  __h3,  __h2,  __h1};
+}
+
+#define _mm512_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, 
\
+   h14, h15, h16, h17, h18, h19, h20, h21, h22, h23, h24,  
\
+   h25, h26, h27, h28, h29, h30, h31, h32) 
\
+  _mm512_set_ph((h32), (h31), (h30), (h29), (h28), (h27), (h26), (h25), (h24), 
\
+ 

[PATCH] D107946: [X86] Reverse *_set_ph and *_setr_ph 's set order.

2021-08-12 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked an inline comment as done.
FreddyYe added a comment.

THX for review.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107946/new/

https://reviews.llvm.org/D107946

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107946: [X86] Reverse *_set_ph and *_setr_ph 's set order.

2021-08-12 Thread Freddy, Ye via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG6c1468854d70: [X86] Reverse *_set_ph and *_setr_ph 's 
set order. (authored by FreddyYe).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107946/new/

https://reviews.llvm.org/D107946

Files:
  clang/lib/Headers/avx512fp16intrin.h
  clang/lib/Headers/avx512vlfp16intrin.h


Index: clang/lib/Headers/avx512vlfp16intrin.h
===
--- clang/lib/Headers/avx512vlfp16intrin.h
+++ clang/lib/Headers/avx512vlfp16intrin.h
@@ -48,7 +48,7 @@
 static __inline __m128h __DEFAULT_FN_ATTRS128
 _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
_Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
-  return (__m128h)(__v8hf){__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8};
+  return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
 }
 
 static __inline __m256h __DEFAULT_FN_ATTRS256
@@ -56,19 +56,18 @@
   _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
   _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
   _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16) {
-  return (__m256h)(__v16hf){__h1,  __h2,  __h3,  __h4,  __h5,  __h6,
-__h7,  __h8,  __h9,  __h10, __h11, __h12,
-__h13, __h14, __h15, __h16};
+  return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11,
+__h10, __h9,  __h8,  __h7,  __h6,  __h5,
+__h4,  __h3,  __h2,  __h1};
 }
 
-#define _mm_setr_ph(__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8)
\
-  _mm_set_ph((__h8), (__h7), (__h6), (__h5), (__h4), (__h3), (__h2), (__h1))
+#define _mm_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8)
\
+  _mm_set_ph((h8), (h7), (h6), (h5), (h4), (h3), (h2), (h1))
 
-#define _mm256_setr_ph(__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8, __h9,   
\
-   __h10, __h11, __h12, __h13, __h14, __h15, __h16)
\
-  _mm256_set_ph((__h16), (__h15), (__h14), (__h13), (__h12), (__h11), (__h10), 
\
-(__h9), (__h8), (__h7), (__h6), (__h5), (__h4), (__h3),
\
-(__h2), (__h1))
+#define _mm256_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, 
\
+   h14, h15, h16)  
\
+  _mm256_set_ph((h16), (h15), (h14), (h13), (h12), (h11), (h10), (h9), (h8),   
\
+(h7), (h6), (h5), (h4), (h3), (h2), (h1))
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_abs_ph(__m256h __A) {
   return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), 
(__m256i)__A);
Index: clang/lib/Headers/avx512fp16intrin.h
===
--- clang/lib/Headers/avx512fp16intrin.h
+++ clang/lib/Headers/avx512fp16intrin.h
@@ -82,22 +82,20 @@
   _Float16 __h21, _Float16 __h22, _Float16 __h23, _Float16 __h24,
   _Float16 __h25, _Float16 __h26, _Float16 __h27, _Float16 __h28,
   _Float16 __h29, _Float16 __h30, _Float16 __h31, _Float16 __h32) {
-  return (__m512h)(__v32hf){__h1,  __h2,  __h3,  __h4,  __h5,  __h6,  __h7,
-__h8,  __h9,  __h10, __h11, __h12, __h13, __h14,
-__h15, __h16, __h17, __h18, __h19, __h20, __h21,
-__h22, __h23, __h24, __h25, __h26, __h27, __h28,
-__h29, __h30, __h31, __h32};
-}
-
-#define _mm512_setr_ph(__h1, __h2, __h3, __h4, __h5, __h6, __h7, __h8, __h9,   
\
-   __h10, __h11, __h12, __h13, __h14, __h15, __h16, __h17, 
\
-   __h18, __h19, __h20, __h21, __h22, __h23, __h24, __h25, 
\
-   __h26, __h27, __h28, __h29, __h30, __h31, __h32)
\
-  _mm512_set_ph((__h32), (__h31), (__h30), (__h29), (__h28), (__h27), (__h26), 
\
-(__h25), (__h24), (__h23), (__h22), (__h21), (__h20), (__h19), 
\
-(__h18), (__h17), (__h16), (__h15), (__h14), (__h13), (__h12), 
\
-(__h11), (__h10), (__h9), (__h8), (__h7), (__h6), (__h5),  
\
-(__h4), (__h3), (__h2), (__h1))
+  return (__m512h)(__v32hf){__h32, __h31, __h30, __h29, __h28, __h27, __h26,
+__h25, __h24, __h23, __h22, __h21, __h20, __h19,
+__h18, __h17, __h16, __h15, __h14, __h13, __h12,
+__h11, __h10, __h9,  __h8,  __h7,  __h6,  __h5,
+__h4,  __h3,  __h2,  __h1};
+}
+
+#define _mm512_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, 
\
+   h14, h15, h16, h17, h18, h19, h20, h21, h22, h23, h24,  
\
+   h25, h26, 

[PATCH] D112777: [X86][FP16] add alias for *_fmul_pch intrinsics

2021-10-28 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
Herald added subscribers: jeroen.dobbelaere, pengfei.
FreddyYe requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

*_mul_pch is to align with *_mul_ps annd *_mul_pd


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D112777

Files:
  clang/lib/Headers/avx512fp16intrin.h
  clang/lib/Headers/avx512vlfp16intrin.h
  clang/test/CodeGen/X86/avx512fp16-builtins.c
  clang/test/CodeGen/X86/avx512vlfp16-builtins.c

Index: clang/test/CodeGen/X86/avx512vlfp16-builtins.c
===
--- clang/test/CodeGen/X86/avx512vlfp16-builtins.c
+++ clang/test/CodeGen/X86/avx512vlfp16-builtins.c
@@ -2934,36 +2934,72 @@
   return _mm_fmul_pch(__A, __B);
 }
 
+__m128h test_mm_mul_pch(__m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
+  return _mm_mul_pch(__A, __B);
+}
+
 __m128h test_mm_mask_fmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
   // CHECK-LABEL: @test_mm_mask_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
   return _mm_mask_fmul_pch(__W, __U, __A, __B);
 }
 
+__m128h test_mm_mask_mul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
+  return _mm_mask_mul_pch(__W, __U, __A, __B);
+}
+
 __m128h test_mm_maskz_fmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
   // CHECK-LABEL: @test_mm_maskz_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
   return _mm_maskz_fmul_pch(__U, __A, __B);
 }
 
+__m128h test_mm_maskz_mul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
+  return _mm_maskz_mul_pch(__U, __A, __B);
+}
+
 __m256h test_mm256_fmul_pch(__m256h __A, __m256h __B) {
   // CHECK-LABEL: @test_mm256_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
   return _mm256_fmul_pch(__A, __B);
 }
 
+__m256h test_mm256_mul_pch(__m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
+  return _mm256_mul_pch(__A, __B);
+}
+
 __m256h test_mm256_mask_fmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
   // CHECK-LABEL: @test_mm256_mask_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
   return _mm256_mask_fmul_pch(__W, __U, __A, __B);
 }
 
+__m256h test_mm256_mask_mul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_mask_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
+  return _mm256_mask_mul_pch(__W, __U, __A, __B);
+}
+
 __m256h test_mm256_maskz_fmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
   // CHECK-LABEL: @test_mm256_maskz_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
   return _mm256_maskz_fmul_pch(__U, __A, __B);
 }
 
+__m256h test_mm256_maskz_mul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_maskz_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
+  return _mm256_maskz_mul_pch(__U, __A, __B);
+}
+
 __m128h test_mm_fmadd_pch(__m128h __A, __m128h __B, __m128h __C) {
   // CHECK-LABEL: @test_mm_fmadd_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmadd.cph.128
Index: clang/test/CodeGen/X86/avx512fp16-builtins.c
===
--- clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -4315,18 +4315,36 @@
   return _mm512_fmul_pch(__A, __B);
 }
 
+__m512h test_mm512_mul_pch(__m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
+  return _mm512_mul_pch(__A, __B);
+}
+
 __m512h test_mm512_mask_fmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
   // CHECK-LABEL: @test_mm512_mask_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
   return _mm512_mask_fmul_pch(__W, __U, __A, __B);
 }
 
+__m512h test_mm512_mask_mul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_mask_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
+  return _mm512_mask_mul_pch(__W, __U, __A, __B);
+}
+
 __m512h test_mm512_maskz_fmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
   // CHECK-LABEL: @test_mm512_maskz_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
   return _mm512_maskz_fmul_pch(__U, __A, __B);
 }
 
+__m512h test_mm512_maskz_mul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
+  return _mm512_maskz_mul_pch(__U, __A, __B);
+}
+
 __m512h test_mm512_fmul_round_pch(__m512h __A, __m512h __B) {
   // CHECK-LABEL: @test_mm512_fmul_round_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
Index: clang/lib/Headers/avx512vlfp16intrin.h
===

[PATCH] D112777: [X86][FP16] add alias for *_fmul_pch intrinsics

2021-10-31 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

In D112777#3095847 , @pengfei wrote:

>> *_mul_pch is to align with *_mul_ps annd *_mul_pd
>
> And *_mul_ph?

Yes. Corrected in summary.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112777/new/

https://reviews.llvm.org/D112777

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112777: [X86][FP16] add alias for *_fmul_pch intrinsics

2021-10-31 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 383702.
FreddyYe added a comment.

clang-formats.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112777/new/

https://reviews.llvm.org/D112777

Files:
  clang/lib/Headers/avx512fp16intrin.h
  clang/lib/Headers/avx512vlfp16intrin.h
  clang/test/CodeGen/X86/avx512fp16-builtins.c
  clang/test/CodeGen/X86/avx512vlfp16-builtins.c

Index: clang/test/CodeGen/X86/avx512vlfp16-builtins.c
===
--- clang/test/CodeGen/X86/avx512vlfp16-builtins.c
+++ clang/test/CodeGen/X86/avx512vlfp16-builtins.c
@@ -2934,36 +2934,72 @@
   return _mm_fmul_pch(__A, __B);
 }
 
+__m128h test_mm_mul_pch(__m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
+  return _mm_mul_pch(__A, __B);
+}
+
 __m128h test_mm_mask_fmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
   // CHECK-LABEL: @test_mm_mask_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
   return _mm_mask_fmul_pch(__W, __U, __A, __B);
 }
 
+__m128h test_mm_mask_mul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
+  return _mm_mask_mul_pch(__W, __U, __A, __B);
+}
+
 __m128h test_mm_maskz_fmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
   // CHECK-LABEL: @test_mm_maskz_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
   return _mm_maskz_fmul_pch(__U, __A, __B);
 }
 
+__m128h test_mm_maskz_mul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
+  return _mm_maskz_mul_pch(__U, __A, __B);
+}
+
 __m256h test_mm256_fmul_pch(__m256h __A, __m256h __B) {
   // CHECK-LABEL: @test_mm256_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
   return _mm256_fmul_pch(__A, __B);
 }
 
+__m256h test_mm256_mul_pch(__m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
+  return _mm256_mul_pch(__A, __B);
+}
+
 __m256h test_mm256_mask_fmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
   // CHECK-LABEL: @test_mm256_mask_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
   return _mm256_mask_fmul_pch(__W, __U, __A, __B);
 }
 
+__m256h test_mm256_mask_mul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_mask_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
+  return _mm256_mask_mul_pch(__W, __U, __A, __B);
+}
+
 __m256h test_mm256_maskz_fmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
   // CHECK-LABEL: @test_mm256_maskz_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
   return _mm256_maskz_fmul_pch(__U, __A, __B);
 }
 
+__m256h test_mm256_maskz_mul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_maskz_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
+  return _mm256_maskz_mul_pch(__U, __A, __B);
+}
+
 __m128h test_mm_fmadd_pch(__m128h __A, __m128h __B, __m128h __C) {
   // CHECK-LABEL: @test_mm_fmadd_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmadd.cph.128
Index: clang/test/CodeGen/X86/avx512fp16-builtins.c
===
--- clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -4315,18 +4315,36 @@
   return _mm512_fmul_pch(__A, __B);
 }
 
+__m512h test_mm512_mul_pch(__m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
+  return _mm512_mul_pch(__A, __B);
+}
+
 __m512h test_mm512_mask_fmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
   // CHECK-LABEL: @test_mm512_mask_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
   return _mm512_mask_fmul_pch(__W, __U, __A, __B);
 }
 
+__m512h test_mm512_mask_mul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_mask_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
+  return _mm512_mask_mul_pch(__W, __U, __A, __B);
+}
+
 __m512h test_mm512_maskz_fmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
   // CHECK-LABEL: @test_mm512_maskz_fmul_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
   return _mm512_maskz_fmul_pch(__U, __A, __B);
 }
 
+__m512h test_mm512_maskz_mul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
+  return _mm512_maskz_mul_pch(__U, __A, __B);
+}
+
 __m512h test_mm512_fmul_round_pch(__m512h __A, __m512h __B) {
   // CHECK-LABEL: @test_mm512_fmul_round_pch
   // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
Index: clang/lib/Headers/avx512vlfp16intrin.h
===
--- clang/lib/Headers/avx512vlfp16intri

[PATCH] D112777: [X86][FP16] add alias for *_fmul_pch intrinsics

2021-11-07 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

Ping.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112777/new/

https://reviews.llvm.org/D112777

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112777: [X86][FP16] add alias for *_fmul_pch intrinsics

2021-11-07 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

In D112777#3114502 , @craig.topper 
wrote:

> Not directly related to this patch, but why is the suffix _pch and _sch when 
> the instruction names end in CPH and CSH? It kind of seems like the correct 
> intrinsic name would have been _mm_fmulc_ph.
>
> Why does the name here need to be aligned with mul_ps/pd? This a "complex" 
> multiply which is a different operation. Is gcc also going to add aliases?

I can answer the second question. The prefix "f" can be judged as the mnemonic 
to distinguish fma instrinsics. The suffix "c" can be judged as the mnemonic of 
"complex". So add "f" mnemonic in this multiply intrinsics is ambiguous. gcc 
will add aliases, too. The first question is also a good question. But for now, 
it's not very conflict to old intrinsics, I think.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112777/new/

https://reviews.llvm.org/D112777

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112777: [X86][FP16] add alias for *_fmul_pch intrinsics

2021-11-07 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

Thx for review. I found I missed many intrinsics, including _fcmul_* series. 
they need to be aliased with cmul_*. pls wait for update.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112777/new/

https://reviews.llvm.org/D112777

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112777: [X86][FP16] add alias for *_fmul_pch intrinsics

2021-11-07 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

In D112777#3114566 , @craig.topper 
wrote:

> In D112777#3114560 , @FreddyYe 
> wrote:
>
>> In D112777#3114502 , @craig.topper 
>> wrote:
>>
>>> Not directly related to this patch, but why is the suffix _pch and _sch 
>>> when the instruction names end in CPH and CSH? It kind of seems like the 
>>> correct intrinsic name would have been _mm_fmulc_ph.
>>>
>>> Why does the name here need to be aligned with mul_ps/pd? This a "complex" 
>>> multiply which is a different operation. Is gcc also going to add aliases?
>>
>> I can answer the second question. The prefix "f" can be judged as the 
>> mnemonic to distinguish fma instrinsics. The suffix "c" can be judged as the 
>> mnemonic of "complex". So add "f" mnemonic in this multiply intrinsics is 
>> ambiguous. gcc will add aliases, too. The first question is also a good 
>> question. But for now, it's not very conflict to old intrinsics, I think.
>
> But the mnemonic for the instructions here do include an 'F'. While the 
> mulps/mulpd instruction names do not include an 'F'. So we're creating new 
> intrinsics that are further away from the mnemonics of the instructions. Is 
> your argument that the instruction mnemonics are wrong and shouldn't include 
> an 'F'?

Yes, our intrinsics naming convention after instruction name rule should be 
followed, too. So this patch is just adding the alias but not replacing them. 
Yes, from my opinion, it's wrong.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112777/new/

https://reviews.llvm.org/D112777

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112777: [X86][FP16] add alias for *_fmul_pch intrinsics

2021-11-08 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 385443.
FreddyYe added a comment.

address comments. add alias for 36 intrinsics in all.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112777/new/

https://reviews.llvm.org/D112777

Files:
  clang/lib/Headers/avx512fp16intrin.h
  clang/lib/Headers/avx512vlfp16intrin.h
  clang/test/CodeGen/X86/avx512fp16-builtins.c
  clang/test/CodeGen/X86/avx512vlfp16-builtins.c

Index: clang/test/CodeGen/X86/avx512vlfp16-builtins.c
===
--- clang/test/CodeGen/X86/avx512vlfp16-builtins.c
+++ clang/test/CodeGen/X86/avx512vlfp16-builtins.c
@@ -3114,3 +3114,76 @@
   // CHECK: call nnan half @llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}})
   return _mm_reduce_max_ph(__W);
 }
+
+// tests below are for alias intrinsics.
+__m128h test_mm_mul_pch(__m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
+  return _mm_mul_pch(__A, __B);
+}
+
+__m128h test_mm_mask_mul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
+  return _mm_mask_mul_pch(__W, __U, __A, __B);
+}
+
+__m128h test_mm_maskz_mul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
+  return _mm_maskz_mul_pch(__U, __A, __B);
+}
+
+__m256h test_mm256_mul_pch(__m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
+  return _mm256_mul_pch(__A, __B);
+}
+
+__m256h test_mm256_mask_mul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_mask_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
+  return _mm256_mask_mul_pch(__W, __U, __A, __B);
+}
+
+__m256h test_mm256_maskz_mul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_maskz_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
+  return _mm256_maskz_mul_pch(__U, __A, __B);
+}
+
+__m128h test_mm_cmul_pch(__m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.128
+  return _mm_cmul_pch(__A, __B);
+}
+
+__m128h test_mm_mask_cmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_mask_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.128
+  return _mm_mask_fcmul_pch(__W, __U, __A, __B);
+}
+
+__m128h test_mm_maskz_cmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_maskz_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.128
+  return _mm_maskz_cmul_pch(__U, __A, __B);
+}
+
+__m256h test_mm256_cmul_pch(__m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.256
+  return _mm256_cmul_pch(__A, __B);
+}
+
+__m256h test_mm256_mask_cmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_mask_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.256
+  return _mm256_mask_cmul_pch(__W, __U, __A, __B);
+}
+
+__m256h test_mm256_maskz_cmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_maskz_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.256
+  return _mm256_maskz_cmul_pch(__U, __A, __B);
+}
Index: clang/test/CodeGen/X86/avx512fp16-builtins.c
===
--- clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -4482,3 +4482,147 @@
   // CHECK:  %{{.*}} = bitcast <32 x i16> %{{.*}} to <32 x half>
   return _mm512_permutexvar_ph(__A, __B);
 }
+
+// tests below are for alias intrinsics.
+__m512h test_mm512_mul_pch(__m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
+  return _mm512_mul_pch(__A, __B);
+}
+
+__m512h test_mm512_mask_mul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_mask_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
+  return _mm512_mask_mul_pch(__W, __U, __A, __B);
+}
+
+__m512h test_mm512_maskz_mul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
+  return _mm512_maskz_mul_pch(__U, __A, __B);
+}
+
+__m512h test_mm512_cmul_pch(__m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.512
+  return _mm512_cmul_pch(__A, __B);
+}
+__m512h test_mm512_mask_cmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_mask_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.512
+  return _mm512_mask_cmul_pch(__W, __U, __A, __B);
+}
+
+__m512h test_mm512_maskz_cmul_pch(__mmask16 __U, __

[PATCH] D97832: [X86] Refine "Support -march=alderlake"

2021-03-02 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
Herald added subscribers: pengfei, hiraditya.
FreddyYe requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D97832

Files:
  clang/test/Preprocessor/predefined-arch-macros.c
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td

Index: llvm/lib/Target/X86/X86.td
===
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -784,17 +784,6 @@
   list SPRFeatures =
 !listconcat(ICXFeatures, SPRAdditionalFeatures);
 
-  // Alderlake
-  list ADLAdditionalFeatures = [FeatureAVXVNNI,
-  FeatureCLDEMOTE,
-  FeatureHRESET,
-  FeaturePTWRITE,
-  FeatureSERIALIZE,
-  FeatureWAITPKG];
-  list ADLTuning = SKLTuning;
-  list ADLFeatures =
-!listconcat(SKLFeatures, ADLAdditionalFeatures);
-
   // Atom
   list AtomFeatures = [FeatureX87,
  FeatureCMPXCHG8B,
@@ -873,6 +862,31 @@
   list TRMFeatures =
 !listconcat(GLPFeatures, TRMAdditionalFeatures);
 
+// Alderlake
+  list ADLAdditionalFeatures = [FeatureSERIALIZE,
+  FeaturePCONFIG,
+  FeatureSHSTK,
+  FeatureWIDEKL,
+  FeatureINVPCID,
+  FeatureADX,
+  FeatureFMA,
+  FeatureVAES,
+  FeatureVPCLMULQDQ,
+  FeatureF16C,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureLZCNT,
+  FeatureAVXVNNI,
+  FeaturePKU,
+  FeatureHRESET,
+  FeatureCLDEMOTE,
+  FeatureMOVDIRI,
+  FeatureMOVDIR64B,
+  FeatureWAITPKG];
+  list ADLTuning = SKLTuning;
+  list ADLFeatures =
+!listconcat(TRMFeatures, ADLAdditionalFeatures);
+
   // Knights Landing
   list KNLFeatures = [FeatureX87,
 FeatureCMPXCHG8B,
Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -205,9 +205,6 @@
 FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE |
 FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureUINTR |
 FeatureWAITPKG | FeatureAVXVNNI;
-constexpr FeatureBitset FeaturesAlderlake =
-FeaturesSkylakeClient | FeatureCLDEMOTE | FeatureHRESET | FeaturePTWRITE |
-FeatureSERIALIZE | FeatureWAITPKG | FeatureAVXVNNI;
 
 // Intel Atom processors.
 // Bonnell has feature parity with Core2 and adds MOVBE.
@@ -223,6 +220,12 @@
 FeaturesGoldmont | FeaturePTWRITE | FeatureRDPID | FeatureSGX;
 constexpr FeatureBitset FeaturesTremont =
 FeaturesGoldmontPlus | FeatureCLWB | FeatureGFNI;
+constexpr FeatureBitset FeaturesAlderlake =
+FeaturesTremont | FeatureADX | FeatureBMI | FeatureBMI2 | FeatureF16C |
+FeatureFMA | FeatureINVPCID | FeatureLZCNT | FeaturePCONFIG | FeaturePKU |
+FeatureSERIALIZE | FeatureSHSTK | FeatureVAES | FeatureVPCLMULQDQ |
+FeatureCLDEMOTE | FeatureMOVDIR64B | FeatureMOVDIRI | FeatureWAITPKG |
+FeatureAVXVNNI | FeatureHRESET | FeatureWIDEKL;
 
 // Geode Processor.
 constexpr FeatureBitset FeaturesGeode =
Index: clang/test/Preprocessor/predefined-arch-macros.c
===
--- clang/test/Preprocessor/predefined-arch-macros.c
+++ clang/test/Preprocessor/predefined-arch-macros.c
@@ -1791,32 +1791,53 @@
 // CHECK_ADL_M32: #define __BMI__ 1
 // CHECK_ADL_M32: #define __CLDEMOTE__ 1
 // CHECK_ADL_M32: #define __CLFLUSHOPT__ 1
+// CHECK_ADL_M32: #define __CLWB__ 1
 // CHECK_ADL_M32: #define __F16C__ 1
 // CHECK_ADL_M32: #define __FMA__ 1
+// CHECK_ADL_M32: #define __FSGSBASE__ 1
+// CHECK_ADL_M32: #define __FXSR__ 1
+// CHECK_ADL_M32: #define __GFNI__ 1
 // CHECK_ADL_M32: #define __HRESET__ 1
 // CHECK_ADL_M32: #define __INVPCID__ 1
+// CHECK_ADL_M32: #define __KL__ 1
 // CHECK_ADL_M32

[PATCH] D97832: [X86] Refine "Support -march=alderlake"

2021-03-03 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 327691.
FreddyYe added a comment.

delete extra spaces. THX for review!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97832/new/

https://reviews.llvm.org/D97832

Files:
  clang/test/Preprocessor/predefined-arch-macros.c
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td

Index: llvm/lib/Target/X86/X86.td
===
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -784,17 +784,6 @@
   list SPRFeatures =
 !listconcat(ICXFeatures, SPRAdditionalFeatures);
 
-  // Alderlake
-  list ADLAdditionalFeatures = [FeatureAVXVNNI,
-  FeatureCLDEMOTE,
-  FeatureHRESET,
-  FeaturePTWRITE,
-  FeatureSERIALIZE,
-  FeatureWAITPKG];
-  list ADLTuning = SKLTuning;
-  list ADLFeatures =
-!listconcat(SKLFeatures, ADLAdditionalFeatures);
-
   // Atom
   list AtomFeatures = [FeatureX87,
  FeatureCMPXCHG8B,
@@ -873,6 +862,31 @@
   list TRMFeatures =
 !listconcat(GLPFeatures, TRMAdditionalFeatures);
 
+  // Alderlake
+  list ADLAdditionalFeatures = [FeatureSERIALIZE,
+  FeaturePCONFIG,
+  FeatureSHSTK,
+  FeatureWIDEKL,
+  FeatureINVPCID,
+  FeatureADX,
+  FeatureFMA,
+  FeatureVAES,
+  FeatureVPCLMULQDQ,
+  FeatureF16C,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureLZCNT,
+  FeatureAVXVNNI,
+  FeaturePKU,
+  FeatureHRESET,
+  FeatureCLDEMOTE,
+  FeatureMOVDIRI,
+  FeatureMOVDIR64B,
+  FeatureWAITPKG];
+  list ADLTuning = SKLTuning;
+  list ADLFeatures =
+!listconcat(TRMFeatures, ADLAdditionalFeatures);
+
   // Knights Landing
   list KNLFeatures = [FeatureX87,
 FeatureCMPXCHG8B,
Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -205,9 +205,6 @@
 FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE |
 FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureUINTR |
 FeatureWAITPKG | FeatureAVXVNNI;
-constexpr FeatureBitset FeaturesAlderlake =
-FeaturesSkylakeClient | FeatureCLDEMOTE | FeatureHRESET | FeaturePTWRITE |
-FeatureSERIALIZE | FeatureWAITPKG | FeatureAVXVNNI;
 
 // Intel Atom processors.
 // Bonnell has feature parity with Core2 and adds MOVBE.
@@ -223,6 +220,12 @@
 FeaturesGoldmont | FeaturePTWRITE | FeatureRDPID | FeatureSGX;
 constexpr FeatureBitset FeaturesTremont =
 FeaturesGoldmontPlus | FeatureCLWB | FeatureGFNI;
+constexpr FeatureBitset FeaturesAlderlake =
+FeaturesTremont | FeatureADX | FeatureBMI | FeatureBMI2 | FeatureF16C |
+FeatureFMA | FeatureINVPCID | FeatureLZCNT | FeaturePCONFIG | FeaturePKU |
+FeatureSERIALIZE | FeatureSHSTK | FeatureVAES | FeatureVPCLMULQDQ |
+FeatureCLDEMOTE | FeatureMOVDIR64B | FeatureMOVDIRI | FeatureWAITPKG |
+FeatureAVXVNNI | FeatureHRESET | FeatureWIDEKL;
 
 // Geode Processor.
 constexpr FeatureBitset FeaturesGeode =
Index: clang/test/Preprocessor/predefined-arch-macros.c
===
--- clang/test/Preprocessor/predefined-arch-macros.c
+++ clang/test/Preprocessor/predefined-arch-macros.c
@@ -1791,32 +1791,53 @@
 // CHECK_ADL_M32: #define __BMI__ 1
 // CHECK_ADL_M32: #define __CLDEMOTE__ 1
 // CHECK_ADL_M32: #define __CLFLUSHOPT__ 1
+// CHECK_ADL_M32: #define __CLWB__ 1
 // CHECK_ADL_M32: #define __F16C__ 1
 // CHECK_ADL_M32: #define __FMA__ 1
+// CHECK_ADL_M32: #define __FSGSBASE__ 1
+// CHECK_ADL_M32: #define __FXSR__ 1
+// CHECK_ADL_M32: #define __GFNI__ 1
 // CHECK_ADL_M32: #define __HRESET__ 1
 // CHECK_ADL_M32: #define __INVPCID__ 1
+// CHECK_ADL_M32: #define __KL__ 1
 // CHECK_ADL_M32: #define __LZCNT__ 1
 // CHECK_ADL

[PATCH] D97832: [X86] Refine "Support -march=alderlake"

2021-03-03 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked an inline comment as done.
FreddyYe added a comment.

THX for review!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97832/new/

https://reviews.llvm.org/D97832

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D97832: [X86] Refine "Support -march=alderlake"

2021-03-07 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

No more comments?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97832/new/

https://reviews.llvm.org/D97832

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D97832: [X86] Refine "Support -march=alderlake"

2021-03-07 Thread Freddy, Ye via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG5f9489b75405: [X86] Refine "Support 
-march=alderlake" (authored by FreddyYe).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97832/new/

https://reviews.llvm.org/D97832

Files:
  clang/test/Preprocessor/predefined-arch-macros.c
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td

Index: llvm/lib/Target/X86/X86.td
===
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -784,17 +784,6 @@
   list SPRFeatures =
 !listconcat(ICXFeatures, SPRAdditionalFeatures);
 
-  // Alderlake
-  list ADLAdditionalFeatures = [FeatureAVXVNNI,
-  FeatureCLDEMOTE,
-  FeatureHRESET,
-  FeaturePTWRITE,
-  FeatureSERIALIZE,
-  FeatureWAITPKG];
-  list ADLTuning = SKLTuning;
-  list ADLFeatures =
-!listconcat(SKLFeatures, ADLAdditionalFeatures);
-
   // Atom
   list AtomFeatures = [FeatureX87,
  FeatureCMPXCHG8B,
@@ -873,6 +862,31 @@
   list TRMFeatures =
 !listconcat(GLPFeatures, TRMAdditionalFeatures);
 
+  // Alderlake
+  list ADLAdditionalFeatures = [FeatureSERIALIZE,
+  FeaturePCONFIG,
+  FeatureSHSTK,
+  FeatureWIDEKL,
+  FeatureINVPCID,
+  FeatureADX,
+  FeatureFMA,
+  FeatureVAES,
+  FeatureVPCLMULQDQ,
+  FeatureF16C,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureLZCNT,
+  FeatureAVXVNNI,
+  FeaturePKU,
+  FeatureHRESET,
+  FeatureCLDEMOTE,
+  FeatureMOVDIRI,
+  FeatureMOVDIR64B,
+  FeatureWAITPKG];
+  list ADLTuning = SKLTuning;
+  list ADLFeatures =
+!listconcat(TRMFeatures, ADLAdditionalFeatures);
+
   // Knights Landing
   list KNLFeatures = [FeatureX87,
 FeatureCMPXCHG8B,
Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -205,9 +205,6 @@
 FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE |
 FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureUINTR |
 FeatureWAITPKG | FeatureAVXVNNI;
-constexpr FeatureBitset FeaturesAlderlake =
-FeaturesSkylakeClient | FeatureCLDEMOTE | FeatureHRESET | FeaturePTWRITE |
-FeatureSERIALIZE | FeatureWAITPKG | FeatureAVXVNNI;
 
 // Intel Atom processors.
 // Bonnell has feature parity with Core2 and adds MOVBE.
@@ -223,6 +220,12 @@
 FeaturesGoldmont | FeaturePTWRITE | FeatureRDPID | FeatureSGX;
 constexpr FeatureBitset FeaturesTremont =
 FeaturesGoldmontPlus | FeatureCLWB | FeatureGFNI;
+constexpr FeatureBitset FeaturesAlderlake =
+FeaturesTremont | FeatureADX | FeatureBMI | FeatureBMI2 | FeatureF16C |
+FeatureFMA | FeatureINVPCID | FeatureLZCNT | FeaturePCONFIG | FeaturePKU |
+FeatureSERIALIZE | FeatureSHSTK | FeatureVAES | FeatureVPCLMULQDQ |
+FeatureCLDEMOTE | FeatureMOVDIR64B | FeatureMOVDIRI | FeatureWAITPKG |
+FeatureAVXVNNI | FeatureHRESET | FeatureWIDEKL;
 
 // Geode Processor.
 constexpr FeatureBitset FeaturesGeode =
Index: clang/test/Preprocessor/predefined-arch-macros.c
===
--- clang/test/Preprocessor/predefined-arch-macros.c
+++ clang/test/Preprocessor/predefined-arch-macros.c
@@ -1791,32 +1791,53 @@
 // CHECK_ADL_M32: #define __BMI__ 1
 // CHECK_ADL_M32: #define __CLDEMOTE__ 1
 // CHECK_ADL_M32: #define __CLFLUSHOPT__ 1
+// CHECK_ADL_M32: #define __CLWB__ 1
 // CHECK_ADL_M32: #define __F16C__ 1
 // CHECK_ADL_M32: #define __FMA__ 1
+// CHECK_ADL_M32: #define __FSGSBASE__ 1
+// CHECK_ADL_M32: #define __FXSR__ 1
+// CHECK_ADL_M32: #define __GFNI__ 1
 // CHECK_ADL_M32: #define __HRESET__ 1
 // CHECK_ADL_M32: #define __INVPCID__ 1
+// CHECK_ADL_M32: #define _

[PATCH] D112777: [X86][FP16] add alias for f*mul_*ch intrinsics

2021-11-14 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

Ping.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112777/new/

https://reviews.llvm.org/D112777

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D112777: [X86][FP16] add alias for f*mul_*ch intrinsics

2021-11-16 Thread Freddy, Ye via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG73c9cf820409: [X86][FP16] add alias for f*mul_*ch intrinsics 
(authored by FreddyYe).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112777/new/

https://reviews.llvm.org/D112777

Files:
  clang/lib/Headers/avx512fp16intrin.h
  clang/lib/Headers/avx512vlfp16intrin.h
  clang/test/CodeGen/X86/avx512fp16-builtins.c
  clang/test/CodeGen/X86/avx512vlfp16-builtins.c

Index: clang/test/CodeGen/X86/avx512vlfp16-builtins.c
===
--- clang/test/CodeGen/X86/avx512vlfp16-builtins.c
+++ clang/test/CodeGen/X86/avx512vlfp16-builtins.c
@@ -3114,3 +3114,76 @@
   // CHECK: call nnan half @llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}})
   return _mm_reduce_max_ph(__W);
 }
+
+// tests below are for alias intrinsics.
+__m128h test_mm_mul_pch(__m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
+  return _mm_mul_pch(__A, __B);
+}
+
+__m128h test_mm_mask_mul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
+  return _mm_mask_mul_pch(__W, __U, __A, __B);
+}
+
+__m128h test_mm_maskz_mul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.128
+  return _mm_maskz_mul_pch(__U, __A, __B);
+}
+
+__m256h test_mm256_mul_pch(__m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
+  return _mm256_mul_pch(__A, __B);
+}
+
+__m256h test_mm256_mask_mul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_mask_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
+  return _mm256_mask_mul_pch(__W, __U, __A, __B);
+}
+
+__m256h test_mm256_maskz_mul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_maskz_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.256
+  return _mm256_maskz_mul_pch(__U, __A, __B);
+}
+
+__m128h test_mm_cmul_pch(__m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.128
+  return _mm_cmul_pch(__A, __B);
+}
+
+__m128h test_mm_mask_cmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_mask_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.128
+  return _mm_mask_fcmul_pch(__W, __U, __A, __B);
+}
+
+__m128h test_mm_maskz_cmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
+  // CHECK-LABEL: @test_mm_maskz_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.128
+  return _mm_maskz_cmul_pch(__U, __A, __B);
+}
+
+__m256h test_mm256_cmul_pch(__m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.256
+  return _mm256_cmul_pch(__A, __B);
+}
+
+__m256h test_mm256_mask_cmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_mask_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.256
+  return _mm256_mask_cmul_pch(__W, __U, __A, __B);
+}
+
+__m256h test_mm256_maskz_cmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
+  // CHECK-LABEL: @test_mm256_maskz_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.256
+  return _mm256_maskz_cmul_pch(__U, __A, __B);
+}
Index: clang/test/CodeGen/X86/avx512fp16-builtins.c
===
--- clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -4482,3 +4482,147 @@
   // CHECK:  %{{.*}} = bitcast <32 x i16> %{{.*}} to <32 x half>
   return _mm512_permutexvar_ph(__A, __B);
 }
+
+// tests below are for alias intrinsics.
+__m512h test_mm512_mul_pch(__m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
+  return _mm512_mul_pch(__A, __B);
+}
+
+__m512h test_mm512_mask_mul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_mask_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
+  return _mm512_mask_mul_pch(__W, __U, __A, __B);
+}
+
+__m512h test_mm512_maskz_mul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfmul.cph.512
+  return _mm512_maskz_mul_pch(__U, __A, __B);
+}
+
+__m512h test_mm512_cmul_pch(__m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.512
+  return _mm512_cmul_pch(__A, __B);
+}
+__m512h test_mm512_mask_cmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
+  // CHECK-LABEL: @test_mm512_mask_cmul_pch
+  // CHECK: @llvm.x86.avx512fp16.mask.vfcmul.cph.512
+  return _mm512_mask_cmul_pch(__W, __U, __A, __B);
+}
+
+_

[PATCH] D114059: [X86] add 3 missing intrinsics: _mm_(mask/maskz)_cvtpbh_ps

2021-11-16 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
Herald added a subscriber: pengfei.
FreddyYe requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D114059

Files:
  clang/lib/Headers/avx512vlbf16intrin.h
  clang/test/CodeGen/X86/avx512vlbf16-builtins.c

Index: clang/test/CodeGen/X86/avx512vlbf16-builtins.c
===
--- clang/test/CodeGen/X86/avx512vlbf16-builtins.c
+++ clang/test/CodeGen/X86/avx512vlbf16-builtins.c
@@ -169,6 +169,15 @@
   return _mm_cvtness_sbh(A);
 }
 
+__m128 test_mm_cvtpbh_ps(__m128bh A) {
+  // CHECK-LABEL: @test_mm_cvtpbh_ps
+  // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+  // CHECK: @llvm.x86.sse2.pslli.d
+  // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_cvtpbh_ps(A);
+}
+
 __m256 test_mm256_cvtpbh_ps(__m128bh A) {
   // CHECK-LABEL: @test_mm256_cvtpbh_ps
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
@@ -178,6 +187,16 @@
   return _mm256_cvtpbh_ps(A);
 }
 
+__m128 test_mm_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpbh_ps
+  // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+  // CHECK: @llvm.x86.sse2.pslli.d
+  // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_maskz_cvtpbh_ps(M, A);
+}
+
 __m256 test_mm256_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtpbh_ps
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
@@ -188,6 +207,16 @@
   return _mm256_maskz_cvtpbh_ps(M, A);
 }
 
+__m128 test_mm_mask_cvtpbh_ps(__m128 S, __mmask8 M, __m128bh A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpbh_ps
+  // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+  // CHECK: @llvm.x86.sse2.pslli.d
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+  // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_mask_cvtpbh_ps(S, M, A);
+}
+
 __m256 test_mm256_mask_cvtpbh_ps(__m256 S, __mmask8 M, __m128bh A) {
   // CHECK-LABEL: @test_mm256_mask_cvtpbh_ps
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
Index: clang/lib/Headers/avx512vlbf16intrin.h
===
--- clang/lib/Headers/avx512vlbf16intrin.h
+++ clang/lib/Headers/avx512vlbf16intrin.h
@@ -420,6 +420,18 @@
   return __R[0];
 }
 
+/// Convert Packed BF16 Data to Packed float Data.
+///
+/// \headerfile 
+///
+/// \param __A
+///A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from convertion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) {
+  return _mm_castsi128_ps(
+  (__m128i)_mm_slli_epi32((__m128i)_mm_cvtepi16_epi32((__m128i)__A), 16));
+}
+
 /// Convert Packed BF16 Data to Packed float Data.
 ///
 /// \headerfile 
@@ -432,6 +444,22 @@
   (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16));
 }
 
+/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
+///
+/// \headerfile 
+///
+/// \param __U
+///A 4-bit mask. Elements are zeroed out when the corresponding mask
+///bit is not set.
+/// \param __A
+///A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from convertion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
+  return _mm_castsi128_ps((__m128i)_mm_slli_epi32(
+  (__m128i)_mm_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
+}
+
 /// Convert Packed BF16 Data to Packed float Data using zeroing mask.
 ///
 /// \headerfile 
@@ -448,6 +476,26 @@
   (__m256i)_mm256_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
 }
 
+/// Convert Packed BF16 Data to Packed float Data using merging mask.
+///
+/// \headerfile 
+///
+/// \param __S
+///A 128-bit vector of [4 x float]. Elements are copied from __S when
+/// the corresponding mask bit is not set.
+/// \param __U
+///A 4-bit mask. Elements are zeroed out when the corresponding mask
+///bit is not set.
+/// \param __A
+///A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from convertion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128bh __A) {
+  return _mm_castsi128_ps((__m128i)_mm_mask_slli_epi32(
+  (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32((__m128i)__A),
+  16));
+}
+
 /// Convert Packed BF16 Data to Packed float Data using merging mask.
 ///
 /// \headerfile 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D114059: [X86] add 3 missing intrinsics: _mm_(mask/maskz)_cvtpbh_ps

2021-11-16 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 387845.
FreddyYe added a comment.

convertion -> conversion.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D114059/new/

https://reviews.llvm.org/D114059

Files:
  clang/lib/Headers/avx512bf16intrin.h
  clang/lib/Headers/avx512vlbf16intrin.h
  clang/test/CodeGen/X86/avx512vlbf16-builtins.c

Index: clang/test/CodeGen/X86/avx512vlbf16-builtins.c
===
--- clang/test/CodeGen/X86/avx512vlbf16-builtins.c
+++ clang/test/CodeGen/X86/avx512vlbf16-builtins.c
@@ -169,6 +169,15 @@
   return _mm_cvtness_sbh(A);
 }
 
+__m128 test_mm_cvtpbh_ps(__m128bh A) {
+  // CHECK-LABEL: @test_mm_cvtpbh_ps
+  // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+  // CHECK: @llvm.x86.sse2.pslli.d
+  // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_cvtpbh_ps(A);
+}
+
 __m256 test_mm256_cvtpbh_ps(__m128bh A) {
   // CHECK-LABEL: @test_mm256_cvtpbh_ps
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
@@ -178,6 +187,16 @@
   return _mm256_cvtpbh_ps(A);
 }
 
+__m128 test_mm_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpbh_ps
+  // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+  // CHECK: @llvm.x86.sse2.pslli.d
+  // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_maskz_cvtpbh_ps(M, A);
+}
+
 __m256 test_mm256_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtpbh_ps
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
@@ -188,6 +207,16 @@
   return _mm256_maskz_cvtpbh_ps(M, A);
 }
 
+__m128 test_mm_mask_cvtpbh_ps(__m128 S, __mmask8 M, __m128bh A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpbh_ps
+  // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+  // CHECK: @llvm.x86.sse2.pslli.d
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+  // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_mask_cvtpbh_ps(S, M, A);
+}
+
 __m256 test_mm256_mask_cvtpbh_ps(__m256 S, __mmask8 M, __m128bh A) {
   // CHECK-LABEL: @test_mm256_mask_cvtpbh_ps
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
Index: clang/lib/Headers/avx512vlbf16intrin.h
===
--- clang/lib/Headers/avx512vlbf16intrin.h
+++ clang/lib/Headers/avx512vlbf16intrin.h
@@ -420,18 +420,46 @@
   return __R[0];
 }
 
+/// Convert Packed BF16 Data to Packed float Data.
+///
+/// \headerfile 
+///
+/// \param __A
+///A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from conversion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) {
+  return _mm_castsi128_ps(
+  (__m128i)_mm_slli_epi32((__m128i)_mm_cvtepi16_epi32((__m128i)__A), 16));
+}
+
 /// Convert Packed BF16 Data to Packed float Data.
 ///
 /// \headerfile 
 ///
 /// \param __A
 ///A 128-bit vector of [8 x bfloat].
-/// \returns A 256-bit vector of [8 x float] come from convertion of __A
+/// \returns A 256-bit vector of [8 x float] come from conversion of __A
 static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) {
   return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
   (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16));
 }
 
+/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
+///
+/// \headerfile 
+///
+/// \param __U
+///A 4-bit mask. Elements are zeroed out when the corresponding mask
+///bit is not set.
+/// \param __A
+///A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from conversion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
+  return _mm_castsi128_ps((__m128i)_mm_slli_epi32(
+  (__m128i)_mm_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
+}
+
 /// Convert Packed BF16 Data to Packed float Data using zeroing mask.
 ///
 /// \headerfile 
@@ -441,13 +469,33 @@
 ///bit is not set.
 /// \param __A
 ///A 128-bit vector of [8 x bfloat].
-/// \returns A 256-bit vector of [8 x float] come from convertion of __A
+/// \returns A 256-bit vector of [8 x float] come from conversion of __A
 static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
   return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
   (__m256i)_mm256_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
 }
 
+/// Convert Packed BF16 Data to Packed float Data using merging mask.
+///
+/// \headerfile 
+///
+/// \param __S
+///A 128-bit vector of [4 x float]. Elements are copied from __S when
+/// the corresponding mask bit is not set.
+/// \param __U
+///A 4-bit mask. Elements are zeroed out when the corresponding mask
+///bit is not set.
+/// \param __A
+

[PATCH] D114059: [X86] add 3 missing intrinsics: _mm_(mask/maskz)_cvtpbh_ps

2021-11-17 Thread Freddy, Ye via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGeb9dc0c78f97: [X86] add 3 missing intrinsics: 
_mm_(mask/maskz)_cvtpbh_ps (authored by FreddyYe).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D114059/new/

https://reviews.llvm.org/D114059

Files:
  clang/lib/Headers/avx512bf16intrin.h
  clang/lib/Headers/avx512vlbf16intrin.h
  clang/test/CodeGen/X86/avx512vlbf16-builtins.c

Index: clang/test/CodeGen/X86/avx512vlbf16-builtins.c
===
--- clang/test/CodeGen/X86/avx512vlbf16-builtins.c
+++ clang/test/CodeGen/X86/avx512vlbf16-builtins.c
@@ -169,6 +169,15 @@
   return _mm_cvtness_sbh(A);
 }
 
+__m128 test_mm_cvtpbh_ps(__m128bh A) {
+  // CHECK-LABEL: @test_mm_cvtpbh_ps
+  // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+  // CHECK: @llvm.x86.sse2.pslli.d
+  // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_cvtpbh_ps(A);
+}
+
 __m256 test_mm256_cvtpbh_ps(__m128bh A) {
   // CHECK-LABEL: @test_mm256_cvtpbh_ps
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
@@ -178,6 +187,16 @@
   return _mm256_cvtpbh_ps(A);
 }
 
+__m128 test_mm_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpbh_ps
+  // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+  // CHECK: @llvm.x86.sse2.pslli.d
+  // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_maskz_cvtpbh_ps(M, A);
+}
+
 __m256 test_mm256_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtpbh_ps
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
@@ -188,6 +207,16 @@
   return _mm256_maskz_cvtpbh_ps(M, A);
 }
 
+__m128 test_mm_mask_cvtpbh_ps(__m128 S, __mmask8 M, __m128bh A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpbh_ps
+  // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+  // CHECK: @llvm.x86.sse2.pslli.d
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+  // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+  // CHECK: ret <4 x float> %{{.*}}
+  return _mm_mask_cvtpbh_ps(S, M, A);
+}
+
 __m256 test_mm256_mask_cvtpbh_ps(__m256 S, __mmask8 M, __m128bh A) {
   // CHECK-LABEL: @test_mm256_mask_cvtpbh_ps
   // CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
Index: clang/lib/Headers/avx512vlbf16intrin.h
===
--- clang/lib/Headers/avx512vlbf16intrin.h
+++ clang/lib/Headers/avx512vlbf16intrin.h
@@ -420,18 +420,46 @@
   return __R[0];
 }
 
+/// Convert Packed BF16 Data to Packed float Data.
+///
+/// \headerfile 
+///
+/// \param __A
+///A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from conversion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) {
+  return _mm_castsi128_ps(
+  (__m128i)_mm_slli_epi32((__m128i)_mm_cvtepi16_epi32((__m128i)__A), 16));
+}
+
 /// Convert Packed BF16 Data to Packed float Data.
 ///
 /// \headerfile 
 ///
 /// \param __A
 ///A 128-bit vector of [8 x bfloat].
-/// \returns A 256-bit vector of [8 x float] come from convertion of __A
+/// \returns A 256-bit vector of [8 x float] come from conversion of __A
 static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) {
   return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
   (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16));
 }
 
+/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
+///
+/// \headerfile 
+///
+/// \param __U
+///A 4-bit mask. Elements are zeroed out when the corresponding mask
+///bit is not set.
+/// \param __A
+///A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from conversion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
+  return _mm_castsi128_ps((__m128i)_mm_slli_epi32(
+  (__m128i)_mm_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
+}
+
 /// Convert Packed BF16 Data to Packed float Data using zeroing mask.
 ///
 /// \headerfile 
@@ -441,13 +469,33 @@
 ///bit is not set.
 /// \param __A
 ///A 128-bit vector of [8 x bfloat].
-/// \returns A 256-bit vector of [8 x float] come from convertion of __A
+/// \returns A 256-bit vector of [8 x float] come from conversion of __A
 static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
   return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
   (__m256i)_mm256_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
 }
 
+/// Convert Packed BF16 Data to Packed float Data using merging mask.
+///
+/// \headerfile 
+///
+/// \param __S
+///A 128-bit vector of [4 x float]. Elements are copied from __S when
+/// the corresponding mask bit is not set.
+/// \param __U
+///A 4-bit mask. Ele

[PATCH] D135938: [X86] Add AVX-VNNI-INT8 instructions.

2022-10-18 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked 2 inline comments as done.
FreddyYe added a comment.

In D135938#3864521 , @RKSimon wrote:

> Can you fix the MC + disasm test file names - drop att/intel and ensure you 
> test both syntaxes for 32 and 64 bits.
>
> Ideally the 32/64 bit names should be close to each other in a file list 
> (e.g. avx-vnni-int8-32.s + avx-vnni-int8-64.s ?)

I get your point of "close to each other" and updated. And I merged the Disasm 
tests, while I didn't merge the MC tests because it is not so convenient to do. 
See latest updated.

Do we need to rename old tests to follow this rule? Old tests: 
https://github.com/llvm/llvm-project/tree/main/llvm/test/MC/X86 and 
https://github.com/llvm/llvm-project/tree/main/llvm/test/MC/Disassembler/X86


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135938/new/

https://reviews.llvm.org/D135938

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135938: [X86] Add AVX-VNNI-INT8 instructions.

2022-10-18 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added inline comments.



Comment at: llvm/test/CodeGen/X86/avxvnniint8-intrinsics.ll:6
+
+declare <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32>, <4 x i32>, <4 x i32>)
+

craig.topper wrote:
> Are there tests for commuting?
Hi Craig,
Can you show an example of commutable for source operands but none of then are 
destination? I cannot figure out a good way to add such test.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135938/new/

https://reviews.llvm.org/D135938

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135938: [X86] Add AVX-VNNI-INT8 instructions.

2022-10-18 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added inline comments.



Comment at: llvm/test/CodeGen/X86/avxvnniint8-intrinsics.ll:6
+
+declare <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32>, <4 x i32>, <4 x i32>)
+

craig.topper wrote:
> FreddyYe wrote:
> > craig.topper wrote:
> > > Are there tests for commuting?
> > Hi Craig,
> > Can you show an example of commutable for source operands but none of then 
> > are destination? I cannot figure out a good way to add such test.
> See stack_fold_vpdpwssd_commuted in stack-folding-int-avxvnni.ll
Got it. Thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135938/new/

https://reviews.llvm.org/D135938

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135937: [X86] Support -march=raptorlake, meteorlake

2022-10-20 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked an inline comment as done.
FreddyYe added inline comments.



Comment at: compiler-rt/lib/builtins/cpu_model.c:110
   INTEL_COREI7_ROCKETLAKE,
+  ZHAOXIN_FAM7H_LUJIAZUI,
+  INTEL_COREI7_RAPTORLAKE,

skan wrote:
> typo ? (ZHAOXIN_FAM7H_LUJIAZUI)
I think not. That is to keep aligned with libgcc for compatibility by Craig's 
catch on raptorlake patch. See  
https://github.com/gcc-mirror/gcc/blob/master/gcc/common/config/i386/i386-cpuinfo.h#L94

We know gcc is also recently modify here. To wait for their landing and then 
aligning with them, I'll convert to draft for this patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135937/new/

https://reviews.llvm.org/D135937

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135933: [X86] Add CMPCCXADD instructions.

2022-10-20 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added inline comments.



Comment at: llvm/lib/Target/X86/X86.td:259
+"Support CMPCCXADD instructions",
+[FeatureAVX2]>;
 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",

craig.topper wrote:
> Why AVX2?
Removed.



Comment at: llvm/lib/Target/X86/X86InstrSSE.td:8118
 
+let Predicates = [HasCMPCCXADD, In64BitMode], Constraints = "$dstsrc2 = $dst" 
in
+multiclass CMPCCXADD_BASE Opc, string OpcodeStr> {

craig.topper wrote:
> craig.topper wrote:
> > This feels like it belongs somewhere other than X86InstrSSE.td since it's 
> > not vector related.
> Missing `Defs = [EFLAGS]` I think
Yes. Moved to llvm/lib/Target/X86/X86InstrCompiler.td



Comment at: llvm/lib/Target/X86/X86InstrSSE.td:8131
+
+defm CMPBEXADD : CMPCCXADD_BASE<0xe6, "cmpbexadd">;
+defm CMPBXADD  : CMPCCXADD_BASE<0xe2, "cmpbxadd">;

craig.topper wrote:
> Any possibility of doing this like how JCC_1, SETCCr, and CMOV32rr using an 
> immediate for the lower 4 bits of the opcode?
Yes. Changed so.



Comment at: llvm/lib/Target/X86/X86InstrSSE.td:8145-8146
+defm CMPPXADD : CMPCCXADD_BASE<0xea, "cmppxadd">;
+defm CMPSXADD : CMPCCXADD_BASE<0xe8, "cmpsxadd">;
+defm CMPZXADD : CMPCCXADD_BASE<0xe4, "cmpzxadd">;
+

craig.topper wrote:
> Should there be aliases for consistency with Jcc, Setcc, and cmovcc. To 
> support A, AE, GT, GE etc.?
Yes, changed so.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135933/new/

https://reviews.llvm.org/D135933

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135938: [X86] Add AVX-VNNI-INT8 instructions.

2022-10-20 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked 2 inline comments as done.
FreddyYe added a comment.

> Its not a priority, but if you are ever bored and want to do some cleaning 
> then it help!

I see. Then we are on the same side. I'll clean if I had time after landing 
these patches.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135938/new/

https://reviews.llvm.org/D135938

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135933: [X86] Add CMPCCXADD instructions.

2022-10-20 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked 4 inline comments as done.
FreddyYe added inline comments.



Comment at: llvm/lib/Target/X86/X86InstrCompiler.td:1026
 
+let Predicates = [HasCMPCCXADD, In64BitMode], Defs = [EFLAGS],
+Constraints = "$dstsrc2 = $dst" in {

craig.topper wrote:
>  X86InstrCompiler.td is for pseudos and isCodeGenOnly=1 instructions. 
> Basically things only needed by CodeGen and not the assembler/disassembler.
I was to make it near "cmpxchg". What about llvm/lib/Target/X86/X86InstrInfo.td?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135933/new/

https://reviews.llvm.org/D135933

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135932: [X86] Add AVX-IFMA instructions.

2022-10-21 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 469529.
FreddyYe marked 3 inline comments as done.
FreddyYe added a comment.

Address comments. THX for review!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135932/new/

https://reviews.llvm.org/D135932

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/BuiltinsX86.def
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/avxifmaintrin.h
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/attr-target-x86.c
  clang/test/CodeGen/avxifma-builtins.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/IR/IntrinsicsX86.td
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrAVX512.td
  llvm/lib/Target/X86/X86InstrFoldTables.cpp
  llvm/lib/Target/X86/X86InstrInfo.cpp
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86InstrSSE.td
  llvm/lib/Target/X86/X86IntrinsicsInfo.h
  llvm/test/CodeGen/X86/avx-ifma-intrinsics.ll
  llvm/test/MC/Disassembler/X86/avx-ifma-32.txt
  llvm/test/MC/Disassembler/X86/avx-ifma-64.txt
  llvm/test/MC/X86/avx-ifma-att-32.s
  llvm/test/MC/X86/avx-ifma-att-64.s
  llvm/test/MC/X86/avx-ifma-intel-32.s
  llvm/test/MC/X86/avx-ifma-intel-64.s

Index: llvm/test/MC/X86/avx-ifma-intel-64.s
===
--- /dev/null
+++ llvm/test/MC/X86/avx-ifma-intel-64.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avxifma -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xe6]
+ {vex} vpmadd52huq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xe6]
+ {vex} vpmadd52huq xmm12, xmm13, xmm14
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x95,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x24,0x6d,0x00,0xfc,0xff,0xff]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa1,0xe0,0x0f,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa2,0x00,0xf0,0xff,0xff]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x91,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x24,0x6d,0x00,0xfe,0xff,0xff]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa1,0xf0,0x07,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa2,0x00,0xf8,0xff,0xff]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xe6]
+ {vex} vpmadd52luq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xe6]
+ {vex} vpmadd52luq xmm12, xmm13, xmm14
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 26

[PATCH] D135933: [X86] Add CMPCCXADD instructions.

2022-10-23 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added inline comments.



Comment at: clang/lib/Headers/cmpccxaddintrin.h:19-34
+  _CMPCCX_O,   /* Overflow.  */
+  _CMPCCX_NO,  /* No overflow.  */
+  _CMPCCX_B,   /* Below.  */
+  _CMPCCX_NB,  /* Not below.  */
+  _CMPCCX_Z,   /* Zero.  */
+  _CMPCCX_NZ,  /* Not zero.  */
+  _CMPCCX_BE,  /* Below or equal.  */

craig.topper wrote:
> skan wrote:
> > Could you use the same suffix for the condition code as 
> > `./llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h`? e.g
> > ```
> > NB->AE
> > Z->E
> > NZ->NE
> > NBE->A
> > ```
> > and so on.
> Probably should have both versions as aliases.
Yes, agree to add both,



Comment at: llvm/lib/Target/X86/X86InstrInfo.td:3035
+  "cmp${cond}xadd\t{$src3, $dst, $dstsrc1|$dstsrc1, $dst, $src3}",
+  [(set GR64:$dst, (X86cmpccxadd addr:$dstsrc1, GR64:$dstsrc2, 
GR64:$src3, timm:$cond))]>,
+  VEX_4V, VEX_W, T8PD, Sched<[WriteXCHG]>;

craig.topper wrote:
> skan wrote:
> > `set GR64:$dst, EFLAGS ...`?
> That doesn't work unless X86cmpccxadd is declare as having two results.
I did a test. It won't affect this intrinsic's lowering. Since it's not be 
useful and only for intrinsic lowering, I preferred to not add.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135933/new/

https://reviews.llvm.org/D135933

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135933: [X86] Add CMPCCXADD instructions.

2022-10-24 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added inline comments.



Comment at: llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp:1453-1455
+emitMemModRMByte(MI, CurOp + 1, getX86RegNum(MI.getOperand(0)), TSFlags,
+ HasREX, StartByte, OS, Fixups, STI, false);
+CurOp = SrcRegNum + 3; // skip VEX_V4 and CC

skan wrote:
> Minor suggestion
> 
> ```
> emitMemModRMByte(MI, ++CurOp, getX86RegNum(MI.getOperand(0)), TSFlags,
>  HasREX, StartByte, OS, Fixups, STI, false);
> CurOp = SrcRegNum + 2; // skip VEX_V4 and CC
> ```
> would be more clear b/c you use "skip VEX_V4 and CC" in the comments.
We cannot do `++` before emitMemModRMByte, so there is a implicit +1 for that, 
like other cases.



Comment at: llvm/lib/Target/X86/X86InstrInfo.td:3027
+def CMPCCXADDmr32 : I<0xe0, MRMDestMem4VOp3CC, (outs GR32:$dst),
+  (ins GR32:$dstsrc2, i32mem:$dstsrc1, GR32:$src3, ccode:$cond),
+  "cmp${cond}xadd\t{$src3, $dst, $dstsrc1|$dstsrc1, $dst, $src3}",

skan wrote:
> Could you use "GR32:$dstsrc1, i32mem:$dstsrc2" instead?
OK, I'm ok to either. Then line below should do an reversion.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135933/new/

https://reviews.llvm.org/D135933

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135932: [X86] Add AVX-IFMA instructions.

2022-10-26 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 471014.
FreddyYe marked 4 inline comments as done.
FreddyYe added a comment.

Address comments and update. THX for review:

Added llvm/test/CodeGen/X86/stack-folding-int-avx512ifma.ll and
llvm/test/CodeGen/X86/stack-folding-int-avxifma.ll.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135932/new/

https://reviews.llvm.org/D135932

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/BuiltinsX86.def
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/avxifmaintrin.h
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/attr-target-x86.c
  clang/test/CodeGen/avxifma-builtins.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrFoldTables.cpp
  llvm/lib/Target/X86/X86InstrInfo.cpp
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86InstrSSE.td
  llvm/test/CodeGen/X86/avx-ifma-intrinsics.ll
  llvm/test/CodeGen/X86/stack-folding-int-avx512ifma.ll
  llvm/test/CodeGen/X86/stack-folding-int-avxifma.ll
  llvm/test/MC/Disassembler/X86/avx-ifma-32.txt
  llvm/test/MC/Disassembler/X86/avx-ifma-64.txt
  llvm/test/MC/X86/avx-ifma-att-32.s
  llvm/test/MC/X86/avx-ifma-att-64.s
  llvm/test/MC/X86/avx-ifma-intel-32.s
  llvm/test/MC/X86/avx-ifma-intel-64.s

Index: llvm/test/MC/X86/avx-ifma-intel-64.s
===
--- /dev/null
+++ llvm/test/MC/X86/avx-ifma-intel-64.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avxifma -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xe6]
+ {vex} vpmadd52huq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xe6]
+ {vex} vpmadd52huq xmm12, xmm13, xmm14
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x95,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x24,0x6d,0x00,0xfc,0xff,0xff]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa1,0xe0,0x0f,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa2,0x00,0xf0,0xff,0xff]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x91,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x24,0x6d,0x00,0xfe,0xff,0xff]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa1,0xf0,0x07,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa2,0x00,0xf8,0xff,0xff]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xe6]
+ {vex} vpmadd52luq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xe6]
+  

[PATCH] D135932: [X86] Add AVX-IFMA instructions.

2022-10-26 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 471022.
FreddyYe added a comment.

Rebase.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135932/new/

https://reviews.llvm.org/D135932

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/BuiltinsX86.def
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/avxifmaintrin.h
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/attr-target-x86.c
  clang/test/CodeGen/avxifma-builtins.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrFoldTables.cpp
  llvm/lib/Target/X86/X86InstrInfo.cpp
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86InstrSSE.td
  llvm/test/CodeGen/X86/avx-ifma-intrinsics.ll
  llvm/test/CodeGen/X86/stack-folding-int-avx512ifma.ll
  llvm/test/CodeGen/X86/stack-folding-int-avxifma.ll
  llvm/test/MC/Disassembler/X86/avx-ifma-32.txt
  llvm/test/MC/Disassembler/X86/avx-ifma-64.txt
  llvm/test/MC/X86/avx-ifma-att-32.s
  llvm/test/MC/X86/avx-ifma-att-64.s
  llvm/test/MC/X86/avx-ifma-intel-32.s
  llvm/test/MC/X86/avx-ifma-intel-64.s

Index: llvm/test/MC/X86/avx-ifma-intel-64.s
===
--- /dev/null
+++ llvm/test/MC/X86/avx-ifma-intel-64.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avxifma -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xe6]
+ {vex} vpmadd52huq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xe6]
+ {vex} vpmadd52huq xmm12, xmm13, xmm14
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x95,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x24,0x6d,0x00,0xfc,0xff,0xff]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa1,0xe0,0x0f,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa2,0x00,0xf0,0xff,0xff]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x91,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x24,0x6d,0x00,0xfe,0xff,0xff]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa1,0xf0,0x07,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa2,0x00,0xf8,0xff,0xff]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xe6]
+ {vex} vpmadd52luq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xe6]
+ {vex} vpmadd52luq xmm12, xmm13, xmm14
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x95,0xb4,0xa4,0xf5,0x00,0x00,0x00,0x10]

[PATCH] D135932: [X86] Add AVX-IFMA instructions.

2022-10-26 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added inline comments.



Comment at: clang/lib/Headers/avxifmaintrin.h:36
+/// __m128i
+/// _mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
+/// \endcode

pengfei wrote:
> Should we provide unified intrinsic `_mm_madd52hi_epu64` like AVXVNNI?
Good reminding. You mean adding them in intrinsic guide, right?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135932/new/

https://reviews.llvm.org/D135932

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135938: [X86] Add AVX-VNNI-INT8 instructions.

2022-10-26 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added inline comments.



Comment at: llvm/include/llvm/Support/X86TargetParser.def:205
 X86_FEATURE   (AVX512FP16,  "avx512fp16")
+X86_FEATURE   (AVXVNNIINT8, "avxvnniint8")
 X86_FEATURE   (AVXVNNI, "avxvnni")

skan wrote:
> Move it after AVXVNNI to keep the dictionary order?
Better refine at another patch since it's not ordered already



Comment at: llvm/lib/Target/X86/X86ISelLowering.h:592
+VPDPBSSD,
+VPDPBSSDS,
+

RKSimon wrote:
> Do we actually need these? Are you intending to add DAG combines for these?
Yes. A continued patch will support DAG combine like old VNNI instructions. 
https://reviews.llvm.org/D116039


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135938/new/

https://reviews.llvm.org/D135938

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135951: [X86][1/2] SUPPORT RAO-INT

2022-10-26 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added inline comments.



Comment at: clang/docs/ReleaseNotes.rst:553
 - Fix 32-bit ``__fastcall`` and ``__vectorcall`` ABI mismatch with MSVC.
+- Add support for ``RAO-INT`` instructions.
 

Add bullets for supported intrinsics.



Comment at: clang/test/CodeGen/X86/raoint-builtins.c:2
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown 
-target-feature +raoint \
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression 
| FileCheck %s
+

32 bit test coverage.



Comment at: llvm/lib/Target/X86/X86InstrRAOINT.td:9
+//
+// This file describes the instructions that make up the Intel AMX instruction
+// set.

RAOINT


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135951/new/

https://reviews.llvm.org/D135951

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135932: [X86] Add AVX-IFMA instructions.

2022-10-26 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 471033.
FreddyYe marked an inline comment as done.
FreddyYe added a comment.

Address comment.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135932/new/

https://reviews.llvm.org/D135932

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/BuiltinsX86.def
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/avx512ifmavlintrin.h
  clang/lib/Headers/avxifmaintrin.h
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/attr-target-x86.c
  clang/test/CodeGen/avxifma-builtins.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrFoldTables.cpp
  llvm/lib/Target/X86/X86InstrInfo.cpp
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86InstrSSE.td
  llvm/test/CodeGen/X86/avx-ifma-intrinsics.ll
  llvm/test/CodeGen/X86/stack-folding-int-avx512ifma.ll
  llvm/test/CodeGen/X86/stack-folding-int-avxifma.ll
  llvm/test/MC/Disassembler/X86/avx-ifma-32.txt
  llvm/test/MC/Disassembler/X86/avx-ifma-64.txt
  llvm/test/MC/X86/avx-ifma-att-32.s
  llvm/test/MC/X86/avx-ifma-att-64.s
  llvm/test/MC/X86/avx-ifma-intel-32.s
  llvm/test/MC/X86/avx-ifma-intel-64.s

Index: llvm/test/MC/X86/avx-ifma-intel-64.s
===
--- /dev/null
+++ llvm/test/MC/X86/avx-ifma-intel-64.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avxifma -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xe6]
+ {vex} vpmadd52huq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xe6]
+ {vex} vpmadd52huq xmm12, xmm13, xmm14
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x95,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x24,0x6d,0x00,0xfc,0xff,0xff]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa1,0xe0,0x0f,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa2,0x00,0xf0,0xff,0xff]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x91,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x24,0x6d,0x00,0xfe,0xff,0xff]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa1,0xf0,0x07,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa2,0x00,0xf8,0xff,0xff]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xe6]
+ {vex} vpmadd52luq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xe6]
+ {vex} vpmadd52luq xmm12, xmm13, xmm14
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rbp +

[PATCH] D135932: [X86] Add AVX-IFMA instructions.

2022-10-27 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 471350.
FreddyYe added a comment.

Rebase. THX for all of the review!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135932/new/

https://reviews.llvm.org/D135932

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/BuiltinsX86.def
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/avx512ifmavlintrin.h
  clang/lib/Headers/avxifmaintrin.h
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/attr-target-x86.c
  clang/test/CodeGen/avxifma-builtins.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrFoldTables.cpp
  llvm/lib/Target/X86/X86InstrInfo.cpp
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86InstrSSE.td
  llvm/test/CodeGen/X86/avx-ifma-intrinsics.ll
  llvm/test/CodeGen/X86/stack-folding-int-avx512ifma.ll
  llvm/test/CodeGen/X86/stack-folding-int-avxifma.ll
  llvm/test/MC/Disassembler/X86/avx-ifma-32.txt
  llvm/test/MC/Disassembler/X86/avx-ifma-64.txt
  llvm/test/MC/X86/avx-ifma-att-32.s
  llvm/test/MC/X86/avx-ifma-att-64.s
  llvm/test/MC/X86/avx-ifma-intel-32.s
  llvm/test/MC/X86/avx-ifma-intel-64.s

Index: llvm/test/MC/X86/avx-ifma-intel-64.s
===
--- /dev/null
+++ llvm/test/MC/X86/avx-ifma-intel-64.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avxifma -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xe6]
+ {vex} vpmadd52huq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xe6]
+ {vex} vpmadd52huq xmm12, xmm13, xmm14
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x95,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x24,0x6d,0x00,0xfc,0xff,0xff]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa1,0xe0,0x0f,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa2,0x00,0xf0,0xff,0xff]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x91,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x24,0x6d,0x00,0xfe,0xff,0xff]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa1,0xf0,0x07,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa2,0x00,0xf8,0xff,0xff]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xe6]
+ {vex} vpmadd52luq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xe6]
+ {vex} vpmadd52luq xmm12, xmm13, xmm14
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+// C

[PATCH] D135932: [X86] Add AVX-IFMA instructions.

2022-10-27 Thread Freddy, Ye via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG0e720e6adad1: [X86] Add AVX-IFMA instructions. (authored by 
FreddyYe).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135932/new/

https://reviews.llvm.org/D135932

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/BuiltinsX86.def
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/avx512ifmavlintrin.h
  clang/lib/Headers/avxifmaintrin.h
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/attr-target-x86.c
  clang/test/CodeGen/avxifma-builtins.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrFoldTables.cpp
  llvm/lib/Target/X86/X86InstrInfo.cpp
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86InstrSSE.td
  llvm/test/CodeGen/X86/avx-ifma-intrinsics.ll
  llvm/test/CodeGen/X86/stack-folding-int-avx512ifma.ll
  llvm/test/CodeGen/X86/stack-folding-int-avxifma.ll
  llvm/test/MC/Disassembler/X86/avx-ifma-32.txt
  llvm/test/MC/Disassembler/X86/avx-ifma-64.txt
  llvm/test/MC/X86/avx-ifma-att-32.s
  llvm/test/MC/X86/avx-ifma-att-64.s
  llvm/test/MC/X86/avx-ifma-intel-32.s
  llvm/test/MC/X86/avx-ifma-intel-64.s

Index: llvm/test/MC/X86/avx-ifma-intel-64.s
===
--- /dev/null
+++ llvm/test/MC/X86/avx-ifma-intel-64.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avxifma -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xe6]
+ {vex} vpmadd52huq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xe6]
+ {vex} vpmadd52huq xmm12, xmm13, xmm14
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x95,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x24,0x6d,0x00,0xfc,0xff,0xff]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa1,0xe0,0x0f,0x00,0x00]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa2,0x00,0xf0,0xff,0xff]
+ {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x91,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x24,0x6d,0x00,0xfe,0xff,0xff]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa1,0xf0,0x07,0x00,0x00]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa2,0x00,0xf8,0xff,0xff]
+ {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xe6]
+ {vex} vpmadd52luq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xe6]
+ {vex} vpmadd52l

[PATCH] D135930: [X86] Add AVX-NE-CONVERT instructions.

2022-10-28 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added inline comments.



Comment at: llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll:4
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown 
--show-mc-encoding -mattr=+avxneconvert | FileCheck %s --check-prefixes=X86
+
+define <4 x float> @test_int_x86_vbcstnebf162ps128(i8* %A) {

Need to add `+avx512bf16,+avx512vl` tests for shared builtin intrinsic. I just 
found it crashed for lacking new patterns for avx512bf16. I'll update ASAP.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135930/new/

https://reviews.llvm.org/D135930

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135930: [X86] Add AVX-NE-CONVERT instructions.

2022-10-31 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked an inline comment as done.
FreddyYe added inline comments.



Comment at: clang/lib/Headers/avx512vlbf16intrin.h:164
+#define _mm_cvtneps_pbh(A) \
+  ((__m128bh)__builtin_ia32_vcvtneps2bf16128((__v4sf)(A)))
 

pengfei wrote:
> pengfei wrote:
> > RKSimon wrote:
> > > Is there no way for __attribute__ to allow different attribute 
> > > permutations?
> > > 
> > > Also, can we keep the __builtin_ia32_cvtneps2bf16_128 naming convention?
> > > Is there no way for attribute to allow different attribute permutations?
> > 
> > We have discussed this problem with GCC folks. There are two problems here:
> > 1. Unlike builtins, function attributes are more generic. It may introduce 
> > a lot of checks between callers and callees. I had a research to limit it 
> > to `__always_inline__` functions only. However, Clang handles inlining in 
> > middle-end, we don't have such information in the front-end. Besides, we 
> > don't know how to merge different permutations if they are inlining to the 
> > same function.
> > 2. We don't know how to put the permutations into IR's function attributes. 
> > We need to preserve all permutations for inlining reference, but the 
> > backend needs a determine feature list rather than selective.
> It's better to use `__builtin_ia32_cvtneps2bf16_128`.
I think __builtin_ia32_vcvtneps2bf16128 is also a "right" name.

See __builtin_ia32_vfmaddsubph256, __builtin_ia32_minph256...

And I admit naming conventions of clang builtins as well as LLVM IR builtins 
are confusing right now.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135930/new/

https://reviews.llvm.org/D135930

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135937: [WIP][X86] Support -march=raptorlake, meteorlake

2022-10-31 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 471938.
FreddyYe marked an inline comment as done.
FreddyYe added a comment.

Rebase.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135937/new/

https://reviews.llvm.org/D135937

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/attr-target-mv.c
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/test/CodeGen/X86/cpus-intel.ll

Index: llvm/test/CodeGen/X86/cpus-intel.ll
===
--- llvm/test/CodeGen/X86/cpus-intel.ll
+++ llvm/test/CodeGen/X86/cpus-intel.ll
@@ -17,6 +17,8 @@
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lakemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
@@ -52,6 +54,8 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=tremont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knm 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void
Index: llvm/lib/Target/X86/X86.td
===
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -1499,6 +1499,10 @@
 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
 def : ProcModel<"alderlake", AlderlakePModel,
 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
+def : ProcModel<"raptorlake", AlderlakePModel,
+ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
+def : ProcModel<"meteorlake", AlderlakePModel,
+ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
 
 // AMD CPUs.
 
Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -370,6 +370,10 @@
   { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512BF16, FeaturesSapphireRapids },
   // Alderlake microarchitecture based processors.
   { {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake },
+  // Raptorlake microarchitecture based processors.
+  { {"raptorlake"}, CK_Raptorlake, FEATURE_AVX2, FeaturesAlderlake },
+  // Meteorlake microarchitecture based processors.
+  { {"meteorlake"}, CK_Meteorlake, FEATURE_AVX2, FeaturesAlderlake },
   // Knights Landing processor.
   { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL },
   // Knights Mill processor.
Index: llvm/lib/Support/Host.cpp
===
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -820,6 +820,21 @@
   *Subtype = X86::INTEL_COREI7_ALDERLAKE;
   break;
 
+// Raptorlake:
+case 0xb7:
+  CPU = "raptorlake";
+  *Type = X86::INTEL_COREI7;
+  *Subtype = X86::INTEL_COREI7_RAPTORLAKE;
+  break;
+
+// Meteorlake:
+case 0xb5:
+case 0xaa:
+case 0xac:
+  CPU = "meteorlake";
+  *Type = X86::INTEL_COREI7;
+  *Subtype = X86::INTEL_COREI7_METEORLAKE;
+
 // Icelake Xeon:
 case 0x6a:
 case 0x6c:
Index: llvm/include/llvm/Support/X86TargetParser.h
===
--- llvm

[PATCH] D135930: [X86] Add AVX-NE-CONVERT instructions.

2022-10-31 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked an inline comment as done.
FreddyYe added inline comments.



Comment at: clang/lib/Headers/avx512vlbf16intrin.h:164
+#define _mm_cvtneps_pbh(A) \
+  ((__m128bh)__builtin_ia32_vcvtneps2bf16128((__v4sf)(A)))
 

pengfei wrote:
> FreddyYe wrote:
> > pengfei wrote:
> > > pengfei wrote:
> > > > RKSimon wrote:
> > > > > Is there no way for __attribute__ to allow different attribute 
> > > > > permutations?
> > > > > 
> > > > > Also, can we keep the __builtin_ia32_cvtneps2bf16_128 naming 
> > > > > convention?
> > > > > Is there no way for attribute to allow different attribute 
> > > > > permutations?
> > > > 
> > > > We have discussed this problem with GCC folks. There are two problems 
> > > > here:
> > > > 1. Unlike builtins, function attributes are more generic. It may 
> > > > introduce a lot of checks between callers and callees. I had a research 
> > > > to limit it to `__always_inline__` functions only. However, Clang 
> > > > handles inlining in middle-end, we don't have such information in the 
> > > > front-end. Besides, we don't know how to merge different permutations 
> > > > if they are inlining to the same function.
> > > > 2. We don't know how to put the permutations into IR's function 
> > > > attributes. We need to preserve all permutations for inlining 
> > > > reference, but the backend needs a determine feature list rather than 
> > > > selective.
> > > It's better to use `__builtin_ia32_cvtneps2bf16_128`.
> > I think __builtin_ia32_vcvtneps2bf16128 is also a "right" name.
> > 
> > See __builtin_ia32_vfmaddsubph256, __builtin_ia32_minph256...
> > 
> > And I admit naming conventions of clang builtins as well as LLVM IR 
> > builtins are confusing right now.
> The problem here is `16128` is a bit confusing, a `_` breaks it into 2 number.
> But I'm not insist on it :)
I did a try but found __builtin_ia32_cvtneps2bf16_256 existed for avx512bf16, 
and it's used for mask intrinsic lowering currently. What about not change this 
time? We can do a refine patch later for avx512bf16 builtins since they also 
have some redundant FE/codegen logics for 256/512 mask intrinsics.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135930/new/

https://reviews.llvm.org/D135930

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135937: [WIP][X86] Support -march=raptorlake, meteorlake

2022-10-31 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 472203.
FreddyYe marked 7 inline comments as done.
FreddyYe added a comment.

Address comments and update to align with gcc. See my latest comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135937/new/

https://reviews.llvm.org/D135937

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/Basic/Targets/X86.cpp
  clang/test/Driver/x86-march.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/test/CodeGen/X86/cpus-intel.ll

Index: llvm/test/CodeGen/X86/cpus-intel.ll
===
--- llvm/test/CodeGen/X86/cpus-intel.ll
+++ llvm/test/CodeGen/X86/cpus-intel.ll
@@ -17,6 +17,8 @@
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lakemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
@@ -52,6 +54,8 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=tremont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knm 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void
Index: llvm/lib/Target/X86/X86.td
===
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -1499,6 +1499,10 @@
 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
 def : ProcModel<"alderlake", AlderlakePModel,
 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
+def : ProcModel<"raptorlake", AlderlakePModel,
+ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
+def : ProcModel<"meteorlake", AlderlakePModel,
+ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
 
 // AMD CPUs.
 
Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -370,6 +370,10 @@
   { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512BF16, FeaturesSapphireRapids },
   // Alderlake microarchitecture based processors.
   { {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake },
+  // Raptorlake microarchitecture based processors.
+  { {"raptorlake"}, CK_Raptorlake, FEATURE_AVX2, FeaturesAlderlake },
+  // Meteorlake microarchitecture based processors.
+  { {"meteorlake"}, CK_Meteorlake, FEATURE_AVX2, FeaturesAlderlake },
   // Knights Landing processor.
   { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL },
   // Knights Mill processor.
Index: llvm/lib/Support/Host.cpp
===
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -815,6 +815,12 @@
 // Alderlake:
 case 0x97:
 case 0x9a:
+// Raptorlake:
+case 0xb7:
+// Meteorlake:
+case 0xb5:
+case 0xaa:
+case 0xac:
   CPU = "alderlake";
   *Type = X86::INTEL_COREI7;
   *Subtype = X86::INTEL_COREI7_ALDERLAKE;
Index: llvm/include/llvm/Support/X86TargetParser.h
===
--- llvm/include/llvm/Support/X86TargetParser.h
+++ llvm/include/llvm/Support/X86TargetParser.h
@@ -104,6 +104,8 @@
   CK_Tigerlake,
   CK_SapphireRapids,
   CK_Alderlake,
+  CK_Raptorlake,
+  CK_Meteorlake,
   CK_KNL,
   CK_KNM,
   CK_Lakemont,
Index: llvm/docs/ReleaseNotes.rst
===

[PATCH] D135937: [X86] Support -march=raptorlake, meteorlake

2022-10-31 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

For saving capacity of ProcessorSubtypes, gcc decided to not support part of 
compiler features of these two cpus:

  __builtin_cpu_is("meteorlake")
  __attribute__((target("arch=raptorlake")))
  ... some others I don't know.

Updated to align with gcc first. Welcome opinions and review!




Comment at: clang/test/Preprocessor/predefined-arch-macros.c:2233
 
+// RUN: %clang -march=raptorlake -m32 -E -dM %s -o - 2>&1 \
+// RUN: -target i386-unknown-linux \

RKSimon wrote:
> (pedantic) Probably better to put these after the alderlake tests so its 
> easier to find?
I merged them with the check-prefix.



Comment at: compiler-rt/lib/builtins/cpu_model.c:111
+  ZHAOXIN_FAM7H_LUJIAZUI,
+  INTEL_COREI7_RAPTORLAKE,
+  INTEL_COREI7_METEORLAKE,

skan wrote:
> I see. But if possible, could we split "ZHAOXIN_FAM7H_LUJIAZUI" to another 
> patch?
Related change is removed. See my latest comment.



Comment at: compiler-rt/lib/builtins/cpu_model.c:478
+  *Type = INTEL_COREI7;
+  *Subtype = INTEL_COREI7_METEORLAKE;
+

MaskRay wrote:
> fallthrough?
Good catch! While related changes are removed, see my latest comment.



Comment at: llvm/lib/Support/Host.cpp:836
+  *Type = X86::INTEL_COREI7;
+  *Subtype = X86::INTEL_COREI7_METEORLAKE;
+

MaskRay wrote:
> fallthrough?
Good catch!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135937/new/

https://reviews.llvm.org/D135937

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135937: [X86] Support -march=raptorlake, meteorlake

2022-11-01 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

In D135937#3898228 , @FreddyYe wrote:

> For saving capacity of ProcessorSubtypes, gcc decided to not support part of 
> compiler features of these two cpus:
>
>   __builtin_cpu_is("meteorlake")
>   __attribute__((target("arch=raptorlake")))
>   ... some others I don't know.
>
> Gcc's related patch:
> raptorlake 
> 
>  and meteorlake 
> 
> Updated to align with gcc first. Welcome opinions and review!

My opinion is to aligin with gcc first: supported basic -march=xxx only for 
these two cpus and to support the features mentioned above in the future if 
needed. Moreover, these two cpus may have dedicated AdditionalTuning features 
in the future, we can also add at that time.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135937/new/

https://reviews.llvm.org/D135937

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D137153: [X86] Support -march=sierraforest, grandridge, graniterapids.

2022-11-01 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
Herald added subscribers: Enna1, pengfei, hiraditya.
Herald added a project: All.
FreddyYe requested review of this revision.
Herald added projects: clang, Sanitizers, LLVM.
Herald added subscribers: llvm-commits, Sanitizers, cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D137153

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/attr-target-mv.c
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/test/CodeGen/X86/cpus-intel.ll

Index: llvm/test/CodeGen/X86/cpus-intel.ll
===
--- llvm/test/CodeGen/X86/cpus-intel.ll
+++ llvm/test/CodeGen/X86/cpus-intel.ll
@@ -17,6 +17,9 @@
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lakemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=sierraforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=grandridge 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=graniterapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
@@ -52,6 +55,9 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=tremont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knm 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=sierraforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=grandridge 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=graniterapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void
Index: llvm/lib/Target/X86/X86.td
===
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -943,6 +943,14 @@
   list SPRFeatures =
 !listconcat(ICXFeatures, SPRAdditionalFeatures);
 
+  // Graniterapids
+  list GNRAdditionalFeatures = [FeatureAMXTILE,
+  FeatureAMXINT8,
+  FeatureAMXBF16,
+  FeatureBF16];
+  list GNRFeatures =
+!listconcat(SPRFeatures, GNRAdditionalFeatures);
+
   // Atom
   list AtomFeatures = [FeatureX87,
  FeatureCX8,
@@ -1050,6 +1058,19 @@
   list ADLFeatures =
 !listconcat(TRMFeatures, ADLAdditionalFeatures);
 
+  // Sierraforest
+  list SRFAdditionalFeatures = [FeatureCMPCCXADD,
+  FeatureAVXIFMA,
+  FeatureAVXNECONVERT,
+  FeatureAVXVNNIINT8];
+  list SRFFeatures =
+!listconcat(ADLFeatures, SRFAdditionalFeatures);
+
+  // Grandridge
+  list GRRAdditionalFeatures = [FeatureRAOINT];
+  list GRRFeatures =
+!listconcat(SRFFeatures, GRRAdditionalFeatures);
+
   // Knights Landing
   list KNLFeatures = [FeatureX87,
 FeatureCX8,
@@ -1502,6 +1523,12 @@
 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
 def : ProcModel<"alderlake", AlderlakePModel,
 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
+def : ProcModel<"sierraforest", SLMModel, ProcessorFeatures.SRFFeatures,
+ProcessorFeatures.TRMTuning>;
+def : ProcModel<"grandridge", SLMModel, ProcessorFeatures.GRRFeatures,
+ 

[PATCH] D135932: Add AVX-IFMA instructions.

2022-10-13 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
Herald added subscribers: pengfei, hiraditya.
Herald added a project: All.
FreddyYe requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

For more details about these instructions, please refer to the latest ISE 
document: 
https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D135932

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/BuiltinsX86.def
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/avxifmaintrin.h
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/attr-target-x86.c
  clang/test/CodeGen/avxifma-builtins.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/predefined-arch-macros-x86.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/IR/IntrinsicsX86.td
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrFoldTables.cpp
  llvm/lib/Target/X86/X86InstrInfo.cpp
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86InstrSSE.td
  llvm/lib/Target/X86/X86IntrinsicsInfo.h
  llvm/test/CodeGen/X86/avx-ifma-intrinsics.ll
  llvm/test/MC/Disassembler/X86/avx-ifma-att.txt
  llvm/test/MC/Disassembler/X86/avx-ifma-intel.txt
  llvm/test/MC/Disassembler/X86/x86-64-avx-ifma-att.txt
  llvm/test/MC/Disassembler/X86/x86-64-avx-ifma-intel.txt
  llvm/test/MC/X86/avx-ifma-att.s
  llvm/test/MC/X86/avx-ifma-intel.s
  llvm/test/MC/X86/x86-64-avx-ifma-att.s

Index: llvm/test/MC/X86/x86-64-avx-ifma-att.s
===
--- /dev/null
+++ llvm/test/MC/X86/x86-64-avx-ifma-att.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple=x86_64-unknown-unknown -mattr=+avxifma --show-encoding < %s  | FileCheck %s
+
+// CHECK: {vex} vpmadd52huq %ymm14, %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xe6]
+ {vex} vpmadd52huq %ymm14, %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq %xmm14, %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xe6]
+ {vex} vpmadd52huq %xmm14, %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52huq  268435456(%rbp,%r14,8), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x22,0x95,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq  268435456(%rbp,%r14,8), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq  291(%r8,%rax,4), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq  291(%r8,%rax,4), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq  (%rip), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq  (%rip), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq  -1024(,%rbp,2), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x24,0x6d,0x00,0xfc,0xff,0xff]
+ {vex} vpmadd52huq  -1024(,%rbp,2), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq  4064(%rcx), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa1,0xe0,0x0f,0x00,0x00]
+ {vex} vpmadd52huq  4064(%rcx), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq  -4096(%rdx), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa2,0x00,0xf0,0xff,0xff]
+ {vex} vpmadd52huq  -4096(%rdx), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq  268435456(%rbp,%r14,8), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x22,0x91,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+ {vex} vpmadd52huq  268435456(%rbp,%r14,8), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52huq  291(%r8,%rax,4), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+ {vex} vpmadd52huq  291(%r8,%rax,4), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52huq  (%rip), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x25,0x00,0x00,0x00,0x00]
+ {vex} vpmadd52huq  (%rip), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52huq  -512(,%rbp,2), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x24,0x6d,0x00,0xfe,0xff,0xff]
+ {vex} vpmadd52huq  -512(,%rbp,2), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52huq  2032(%rcx), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa1,0xf0,0x07,0x00,0x00]
+ {vex} vpmadd52huq  2032(%rcx), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52huq  -2048(%rdx), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa2,0x00,0xf8,0xff,0xff]
+ {vex} vpmadd52huq  -2048(%rdx), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52luq %ymm14, %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xe6]
+ {vex} vpmadd52luq %ymm14, %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52luq %xmm14, %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xe6]
+ {vex} vpmadd52luq %xmm14, %xmm13, %xmm12
+
+/

[PATCH] D135936: [X86] Support -march=raptorlake

2022-10-13 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
Herald added subscribers: Enna1, pengfei, hiraditya.
Herald added a project: All.
FreddyYe requested review of this revision.
Herald added projects: clang, Sanitizers, LLVM.
Herald added subscribers: llvm-commits, Sanitizers, cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D135936

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/attr-target-mv.c
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/test/CodeGen/X86/cpus-intel.ll

Index: llvm/test/CodeGen/X86/cpus-intel.ll
===
--- llvm/test/CodeGen/X86/cpus-intel.ll
+++ llvm/test/CodeGen/X86/cpus-intel.ll
@@ -17,6 +17,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lakemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
@@ -52,6 +53,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=tremont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knm 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void
Index: llvm/lib/Target/X86/X86.td
===
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -1420,6 +1420,8 @@
 ProcessorFeatures.GLPTuning>;
 def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
 ProcessorFeatures.TRMTuning>;
+def : ProcModel<"raptorlake", SLMModel, ProcessorFeatures.ADLFeatures,
+ProcessorFeatures.TRMTuning>;
 
 // "Arrandale" along with corei3 and corei5
 foreach P = ["nehalem", "corei7"] in {
Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -370,6 +370,8 @@
   { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512BF16, FeaturesSapphireRapids },
   // Alderlake microarchitecture based processors.
   { {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake },
+  // Raptorlake microarchitecture based processors.
+  { {"raptorlake"}, CK_Raptorlake, FEATURE_AVX2, FeaturesAlderlake },
   // Knights Landing processor.
   { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL },
   // Knights Mill processor.
Index: llvm/lib/Support/Host.cpp
===
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -820,6 +820,12 @@
   *Subtype = X86::INTEL_COREI7_ALDERLAKE;
   break;
 
+// Raptorlake:
+case 0xb7:
+  CPU = "raptorlake";
+  *Type = X86::INTEL_COREI7;
+  *Subtype = X86::INTEL_COREI7_RAPTORLAKE;
+  break;
 // Icelake Xeon:
 case 0x6a:
 case 0x6c:
Index: llvm/include/llvm/Support/X86TargetParser.h
===
--- llvm/include/llvm/Support/X86TargetParser.h
+++ llvm/include/llvm/Support/X86TargetParser.h
@@ -104,6 +104,7 @@
   CK_Tigerlake,
   CK_SapphireRapids,
   CK_Alderlake,
+  CK_Raptorlake,
   CK_KNL,
   CK_KNM,
   CK_Lakemont,
Index: llvm/include/llvm/Support/X86TargetParser.def
===
--- llvm/include/llvm/Support/X86TargetParser.def
+++ llvm/include/llvm/Support/X86TargetParser.def
@@ -89,6 +89,7 @@
 X86_CPU_SUBTYPE(INTEL_COREI7_ALDERLAKE,  "alderlake")
 X86_CPU_SUBTYPE(AMDFAM19H_ZNVER3,"znver3")
 X86_CPU_SUBTYPE(INTEL_COREI7_ROCKETLAKE, "rocketlake")
+X86_CPU_SUBTYPE(INTEL_COREI7_RAPTORLAKE, 

[PATCH] D135937: [X86] Support -march=meteorlake

2022-10-13 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe created this revision.
Herald added subscribers: Enna1, pengfei, hiraditya.
Herald added a project: All.
FreddyYe requested review of this revision.
Herald added projects: clang, Sanitizers, LLVM.
Herald added subscribers: llvm-commits, Sanitizers, cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D135937

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/attr-target-mv.c
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/test/CodeGen/X86/cpus-intel.ll

Index: llvm/test/CodeGen/X86/cpus-intel.ll
===
--- llvm/test/CodeGen/X86/cpus-intel.ll
+++ llvm/test/CodeGen/X86/cpus-intel.ll
@@ -17,6 +17,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lakemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
@@ -52,6 +53,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=tremont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knm 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void
Index: llvm/lib/Target/X86/X86.td
===
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -1420,6 +1420,8 @@
 ProcessorFeatures.GLPTuning>;
 def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
 ProcessorFeatures.TRMTuning>;
+def : ProcModel<"meteorlake", SLMModel, ProcessorFeatures.ADLFeatures,
+ProcessorFeatures.TRMTuning>;
 
 // "Arrandale" along with corei3 and corei5
 foreach P = ["nehalem", "corei7"] in {
Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -370,6 +370,8 @@
   { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512BF16, FeaturesSapphireRapids },
   // Alderlake microarchitecture based processors.
   { {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake },
+  // Meteorlake microarchitecture based processors.
+  { {"meteorlake"}, CK_Meteorlake, FEATURE_AVX2, FeaturesAlderlake },
   // Knights Landing processor.
   { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL },
   // Knights Mill processor.
Index: llvm/lib/Support/Host.cpp
===
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -820,6 +820,14 @@
   *Subtype = X86::INTEL_COREI7_ALDERLAKE;
   break;
 
+// Meteorlake:
+case 0xb5:
+case 0xaa:
+case 0xac:
+  CPU = "meteorlake";
+  *Type = X86::INTEL_COREI7;
+  *Subtype = X86::INTEL_COREI7_METEORLAKE;
+  break;
 // Icelake Xeon:
 case 0x6a:
 case 0x6c:
Index: llvm/include/llvm/Support/X86TargetParser.h
===
--- llvm/include/llvm/Support/X86TargetParser.h
+++ llvm/include/llvm/Support/X86TargetParser.h
@@ -104,6 +104,7 @@
   CK_Tigerlake,
   CK_SapphireRapids,
   CK_Alderlake,
+  CK_Meteorlake,
   CK_KNL,
   CK_KNM,
   CK_Lakemont,
Index: llvm/include/llvm/Support/X86TargetParser.def
===
--- llvm/include/llvm/Support/X86TargetParser.def
+++ llvm/include/llvm/Support/X86TargetParser.def
@@ -89,6 +89,7 @@
 X86_CPU_SUBTYPE(INTEL_COREI7_ALDERLAKE,  "alderlake")
 X86_CPU_SUBTYPE(AMDFAM19H_ZNVER3,"znver3")
 X86_CPU_SUBTYPE(INTEL_COREI7_ROCKETLAKE, "rocketlake")
+X86_CPU_S

[PATCH] D135938: [X86] Add AVX-VNNI-INT8 instructions.

2022-10-14 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe added a comment.

I'm out of machines next two days. Sorry for late address in advance... I'll 
update next Monday. Thanks for review!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135938/new/

https://reviews.llvm.org/D135938

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135938: [X86] Add AVX-VNNI-INT8 instructions.

2022-10-17 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked 4 inline comments as done.
FreddyYe added a comment.

THX for review!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135938/new/

https://reviews.llvm.org/D135938

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135930: [X86] Add AVX-NE-CONVERT instructions.

2022-10-17 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked 5 inline comments as done.
FreddyYe added a comment.

THX for reviews!




Comment at: clang/lib/Headers/immintrin.h:257
 
+/* FIXME: Change these When _Float16 type is supported */
+#if defined(__AVXNECONVERT__) && defined(__AVX512FP16__)

pengfei wrote:
> craig.topper wrote:
> > Is this FIXME still relevant? Don't we support _Float16 with SSE2 now?
> _Float16 is supported with SSE2, but maybe we need to move `__m128h`, 
> `__m256h` out of avx512fp16intrin.h
Yes. This is a redundant FIXME.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135930/new/

https://reviews.llvm.org/D135930

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135936: [X86] Support -march=raptorlake

2022-10-17 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe marked 4 inline comments as done.
FreddyYe added a comment.

THX for review! Merged to https://reviews.llvm.org/D135937 and addressed there.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135936/new/

https://reviews.llvm.org/D135936

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D135937: [X86] Support -march=meteorlake

2022-10-17 Thread Freddy, Ye via Phabricator via cfe-commits
FreddyYe updated this revision to Diff 468387.
FreddyYe added a comment.

Merge raptorlake patch and address comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135937/new/

https://reviews.llvm.org/D135937

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/attr-target-mv.c
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/test/CodeGen/X86/cpus-intel.ll

Index: llvm/test/CodeGen/X86/cpus-intel.ll
===
--- llvm/test/CodeGen/X86/cpus-intel.ll
+++ llvm/test/CodeGen/X86/cpus-intel.ll
@@ -17,6 +17,8 @@
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lakemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
@@ -52,6 +54,8 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=tremont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knm 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void
Index: llvm/lib/Target/X86/X86.td
===
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -1481,6 +1481,10 @@
 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
 def : ProcModel<"alderlake", AlderlakePModel,
 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
+def : ProcModel<"raptorlake", AlderlakePModel,
+ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
+def : ProcModel<"meteorlake", AlderlakePModel,
+ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
 
 // AMD CPUs.
 
Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -370,6 +370,10 @@
   { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512BF16, FeaturesSapphireRapids },
   // Alderlake microarchitecture based processors.
   { {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake },
+  // Raptorlake microarchitecture based processors.
+  { {"raptorlake"}, CK_Raptorlake, FEATURE_AVX2, FeaturesAlderlake },
+  // Meteorlake microarchitecture based processors.
+  { {"meteorlake"}, CK_Meteorlake, FEATURE_AVX2, FeaturesAlderlake },
   // Knights Landing processor.
   { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL },
   // Knights Mill processor.
Index: llvm/lib/Support/Host.cpp
===
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -820,6 +820,21 @@
   *Subtype = X86::INTEL_COREI7_ALDERLAKE;
   break;
 
+// Raptorlake:
+case 0xb7:
+  CPU = "raptorlake";
+  *Type = X86::INTEL_COREI7;
+  *Subtype = X86::INTEL_COREI7_RAPTORLAKE;
+  break;
+
+// Meteorlake:
+case 0xb5:
+case 0xaa:
+case 0xac:
+  CPU = "meteorlake";
+  *Type = X86::INTEL_COREI7;
+  *Subtype = X86::INTEL_COREI7_METEORLAKE;
+
 // Icelake Xeon:
 case 0x6a:
 case 0x6c:
Index: llvm/include/llvm/Support/X86TargetParser.h
===
--- llvm/inclu

  1   2   3   4   >