[PATCH] D60748: Fix i386 struct and union parameter alignment

2019-05-30 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei reopened this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

Reverted by https://reviews.llvm.org/rL362186


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60748/new/

https://reviews.llvm.org/D60748



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D62367: [X86] VP2INTERSECT clang

2019-05-30 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL362196: [X86] Add VP2INTERSECT instructions (authored by 
pengfei, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D62367?vs=202359&id=202375#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62367/new/

https://reviews.llvm.org/D62367

Files:
  cfe/trunk/docs/ClangCommandLineReference.rst
  cfe/trunk/include/clang/Basic/BuiltinsX86.def
  cfe/trunk/include/clang/Driver/Options.td
  cfe/trunk/lib/Basic/Targets/X86.cpp
  cfe/trunk/lib/Basic/Targets/X86.h
  cfe/trunk/lib/CodeGen/CGBuiltin.cpp
  cfe/trunk/lib/Headers/CMakeLists.txt
  cfe/trunk/lib/Headers/avx512vlvp2intersectintrin.h
  cfe/trunk/lib/Headers/avx512vp2intersectintrin.h
  cfe/trunk/lib/Headers/immintrin.h
  cfe/trunk/test/CodeGen/attr-target-x86.c
  cfe/trunk/test/CodeGen/intel-avx512vlvp2intersect.c
  cfe/trunk/test/CodeGen/intel-avx512vp2intersect.c
  cfe/trunk/test/Driver/x86-target-features.c
  cfe/trunk/test/Preprocessor/x86_target_features.c

Index: cfe/trunk/lib/Basic/Targets/X86.h
===
--- cfe/trunk/lib/Basic/Targets/X86.h
+++ cfe/trunk/lib/Basic/Targets/X86.h
@@ -78,6 +78,7 @@
   bool HasAVX512VBMI = false;
   bool HasAVX512VBMI2 = false;
   bool HasAVX512IFMA = false;
+  bool HasAVX512VP2INTERSECT = false;
   bool HasSHA = false;
   bool HasMPX = false;
   bool HasSHSTK = false;
Index: cfe/trunk/lib/Basic/Targets/X86.cpp
===
--- cfe/trunk/lib/Basic/Targets/X86.cpp
+++ cfe/trunk/lib/Basic/Targets/X86.cpp
@@ -524,6 +524,7 @@
 Features["avx512ifma"] = Features["avx512vpopcntdq"] = false;
 Features["avx512bitalg"] = Features["avx512vnni"] = false;
 Features["avx512vbmi2"] = Features["avx512bf16"] = false;
+Features["avx512vp2intersect"] = false;
 break;
   }
 }
@@ -774,6 +775,8 @@
   HasAVX512VBMI2 = true;
 } else if (Feature == "+avx512ifma") {
   HasAVX512IFMA = true;
+} else if (Feature == "+avx512vp2intersect") {
+  HasAVX512VP2INTERSECT = true;
 } else if (Feature == "+sha") {
   HasSHA = true;
 } else if (Feature == "+mpx") {
@@ -1166,7 +1169,8 @@
 Builder.defineMacro("__AVX512VBMI2__");
   if (HasAVX512IFMA)
 Builder.defineMacro("__AVX512IFMA__");
-
+  if (HasAVX512VP2INTERSECT)
+Builder.defineMacro("__AVX512VP2INTERSECT__");
   if (HasSHA)
 Builder.defineMacro("__SHA__");
 
@@ -1322,6 +1326,7 @@
   .Case("avx512vbmi", true)
   .Case("avx512vbmi2", true)
   .Case("avx512ifma", true)
+  .Case("avx512vp2intersect", true)
   .Case("bmi", true)
   .Case("bmi2", true)
   .Case("cldemote", true)
@@ -1401,6 +1406,7 @@
   .Case("avx512vbmi", HasAVX512VBMI)
   .Case("avx512vbmi2", HasAVX512VBMI2)
   .Case("avx512ifma", HasAVX512IFMA)
+  .Case("avx512vp2intersect", HasAVX512VP2INTERSECT)
   .Case("bmi", HasBMI)
   .Case("bmi2", HasBMI2)
   .Case("cldemote", HasCLDEMOTE)
Index: cfe/trunk/lib/Headers/avx512vlvp2intersectintrin.h
===
--- cfe/trunk/lib/Headers/avx512vlvp2intersectintrin.h
+++ cfe/trunk/lib/Headers/avx512vlvp2intersectintrin.h
@@ -0,0 +1,121 @@
+/*===-- avx512vlvp2intersectintrin.h - VL VP2INTERSECT intrinsics --===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===---===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use  directly; include  instead."
+#endif
+
+#ifndef _AVX512VLVP2INTERSECT_H
+#define _AVX512VLVP2INTERSECT_H
+
+#define __DEFAULT_FN_ATTRS128 \
+  __attribute__((__always_inline__, __nodebug__,  __target__("avx512vl,avx512vp2intersect"), \
+ __min_vector_width__(128)))
+
+

[PATCH] D62282: [X86] Add ENQCMD intrinsics.

2019-06-06 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL362685: [X86] Add ENQCMD instructions (authored by pengfei, 
committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D62282?vs=202856&id=203301#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62282/new/

https://reviews.llvm.org/D62282

Files:
  cfe/trunk/docs/ClangCommandLineReference.rst
  cfe/trunk/include/clang/Basic/BuiltinsX86.def
  cfe/trunk/include/clang/Driver/Options.td
  cfe/trunk/lib/Basic/Targets/X86.cpp
  cfe/trunk/lib/Basic/Targets/X86.h
  cfe/trunk/lib/Headers/CMakeLists.txt
  cfe/trunk/lib/Headers/cpuid.h
  cfe/trunk/lib/Headers/enqcmdintrin.h
  cfe/trunk/lib/Headers/immintrin.h
  cfe/trunk/test/CodeGen/x86-enqcmd-builtins.c
  cfe/trunk/test/Driver/x86-target-features.c
  cfe/trunk/test/Preprocessor/x86_target_features.c

Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def
===
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def
@@ -1894,6 +1894,10 @@
 // INVPCID
 TARGET_BUILTIN(__builtin_ia32_invpcid, "vUiv*", "nc", "invpcid")
 
+// ENQCMD
+TARGET_BUILTIN(__builtin_ia32_enqcmd, "Ucv*vC*", "n", "enqcmd")
+TARGET_BUILTIN(__builtin_ia32_enqcmds, "Ucv*vC*", "n", "enqcmd")
+
 // MSVC
 TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
Index: cfe/trunk/include/clang/Driver/Options.td
===
--- cfe/trunk/include/clang/Driver/Options.td
+++ cfe/trunk/include/clang/Driver/Options.td
@@ -2916,6 +2916,8 @@
 def mno_clzero : Flag<["-"], "mno-clzero">, Group;
 def mcx16 : Flag<["-"], "mcx16">, Group;
 def mno_cx16 : Flag<["-"], "mno-cx16">, Group;
+def menqcmd : Flag<["-"], "menqcmd">, Group;
+def mno_enqcmd : Flag<["-"], "mno-enqcmd">, Group;
 def mf16c : Flag<["-"], "mf16c">, Group;
 def mno_f16c : Flag<["-"], "mno-f16c">, Group;
 def mfma : Flag<["-"], "mfma">, Group;
Index: cfe/trunk/test/Preprocessor/x86_target_features.c
===
--- cfe/trunk/test/Preprocessor/x86_target_features.c
+++ cfe/trunk/test/Preprocessor/x86_target_features.c
@@ -468,3 +468,10 @@
 
 // NOVP2INTERSECT-NOT: #define __AVX512VP2INTERSECT__ 1
 
+// RUN: %clang -target i386-unknown-unknown -march=atom -menqcmd -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=ENQCMD %s
+
+// ENQCMD: #define __ENQCMD__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mno-enqcmd -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOENQCMD %s
+
+// NOENQCMD-NOT: #define __ENQCMD__ 1
Index: cfe/trunk/test/CodeGen/x86-enqcmd-builtins.c
===
--- cfe/trunk/test/CodeGen/x86-enqcmd-builtins.c
+++ cfe/trunk/test/CodeGen/x86-enqcmd-builtins.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple i386-unknown-unknown -target-feature +enqcmd -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple x86_64-unknown-unknown -target-feature +enqcmd -emit-llvm -o - | FileCheck %s
+
+#include 
+
+int test_enqcmd(void *dst, const void *src) {
+// CHECK-LABEL: @test_enqcmd
+// CHECK: %[[TMP0:.+]] = call i8 @llvm.x86.enqcmd(i8* %{{.+}}, i8* %{{.+}})
+// CHECK: %[[RET:.+]] = zext i8 %[[TMP0]] to i32
+// CHECK: ret i32 %[[RET]]
+return _enqcmd(dst, src);
+}
+
+int test_enqcmds(void *dst, const void *src) {
+// CHECK-LABEL: @test_enqcmds
+// CHECK: %[[TMP0:.+]] = call i8 @llvm.x86.enqcmds(i8* %{{.+}}, i8* %{{.+}})
+// CHECK: %[[RET:.+]] = zext i8 %[[TMP0]] to i32
+// CHECK: ret i32 %[[RET]]
+return _enqcmds(dst, src);
+}
Index: cfe/trunk/test/Driver/x86-target-features.c
===
--- cfe/trunk/test/Driver/x86-target-features.c
+++ cfe/trunk/test/Driver/x86-target-features.c
@@ -188,3 +188,8 @@
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512bf16 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX512BF16 %s
 // AVX512BF16: "-target-feature" "+avx512bf16"
 // NO-AVX512BF16: "-target-feature" "-avx512bf16"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -menqcmd %s -### -o %t.o 2>&1 | FileCheck --check-prefix=ENQCMD %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-enqcmd %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-ENQCMD %s
+// ENQCMD: "-target-feature" "+enqcmd"
+// NO-ENQCMD: "-target-feature" "-enqcmd"
Index: cfe/trunk/lib/Basic/Targets/X86.cpp
===
--- cfe/trunk/lib/Basic/Targets/X86.cpp
+++ cfe/trunk/lib/Basic/Targets/X86.cpp
@@ -835,6 +835,8 @@
   HasPTWRITE = true;
 } e

[PATCH] D62835: [X86] -march=cooperlake (clang)

2019-06-07 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL362781: [X86] -march=cooperlake (clang) (authored by 
pengfei, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D62835?vs=203329&id=203515#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62835/new/

https://reviews.llvm.org/D62835

Files:
  cfe/trunk/include/clang/Basic/X86Target.def
  cfe/trunk/lib/Basic/Targets/X86.cpp
  cfe/trunk/test/Driver/x86-march.c
  cfe/trunk/test/Misc/target-invalid-cpu-note.c
  cfe/trunk/test/Preprocessor/predefined-arch-macros.c

Index: cfe/trunk/lib/Basic/Targets/X86.cpp
===
--- cfe/trunk/lib/Basic/Targets/X86.cpp
+++ cfe/trunk/lib/Basic/Targets/X86.cpp
@@ -156,6 +156,13 @@
 setFeatureEnabledImpl(Features, "avx512vbmi", true);
 setFeatureEnabledImpl(Features, "sha", true);
 LLVM_FALLTHROUGH;
+  case CK_Cooperlake:
+// Cannonlake, IcelakeClient and IcelakeServer have no AVX512BF16 feature
+if (Kind != CK_Cannonlake && Kind != CK_IcelakeClient &&
+Kind != CK_IcelakeServer)
+  // CPX inherits all CLX features plus AVX512BF16
+  setFeatureEnabledImpl(Features, "avx512bf16", true);
+LLVM_FALLTHROUGH;
   case CK_Cascadelake:
 //Cannonlake has no VNNI feature inside while Icelake has
 if (Kind != CK_Cannonlake)
@@ -176,9 +183,9 @@
 setFeatureEnabledImpl(Features, "xsavec", true);
 setFeatureEnabledImpl(Features, "xsaves", true);
 setFeatureEnabledImpl(Features, "mpx", true);
-if (Kind != CK_SkylakeServer
-&& Kind != CK_Cascadelake)
-  // SKX/CLX inherits all SKL features, except SGX
+if (Kind != CK_SkylakeServer && Kind != CK_Cascadelake &&
+Kind != CK_Cooperlake)
+  // SKX/CLX/CPX inherits all SKL features, except SGX
   setFeatureEnabledImpl(Features, "sgx", true);
 setFeatureEnabledImpl(Features, "clflushopt", true);
 setFeatureEnabledImpl(Features, "aes", true);
@@ -981,6 +988,7 @@
   case CK_SkylakeClient:
   case CK_SkylakeServer:
   case CK_Cascadelake:
+  case CK_Cooperlake:
   case CK_Cannonlake:
   case CK_IcelakeClient:
   case CK_IcelakeServer:
Index: cfe/trunk/include/clang/Basic/X86Target.def
===
--- cfe/trunk/include/clang/Basic/X86Target.def
+++ cfe/trunk/include/clang/Basic/X86Target.def
@@ -157,6 +157,10 @@
 /// Cascadelake Server microarchitecture based processors.
 PROC_WITH_FEAT(Cascadelake, "cascadelake", PROC_64_BIT, FEATURE_AVX512VNNI)
 
+/// \name Cooperlake Server
+/// Cooperlake Server microarchitecture based processors.
+PROC_WITH_FEAT(Cooperlake, "cooperlake", PROC_64_BIT, FEATURE_AVX512BF16)
+
 /// \name Cannonlake Client
 /// Cannonlake client microarchitecture based processors.
 PROC_WITH_FEAT(Cannonlake, "cannonlake", PROC_64_BIT, FEATURE_AVX512VBMI)
@@ -292,6 +296,7 @@
 FEATURE(FEATURE_VPCLMULQDQ)
 FEATURE(FEATURE_AVX512VNNI)
 FEATURE(FEATURE_AVX512BITALG)
+FEATURE(FEATURE_AVX512BF16)
 
 
 // FIXME: When commented out features are supported in LLVM, enable them here.
Index: cfe/trunk/test/Driver/x86-march.c
===
--- cfe/trunk/test/Driver/x86-march.c
+++ cfe/trunk/test/Driver/x86-march.c
@@ -52,6 +52,10 @@
 // RUN:   | FileCheck %s -check-prefix=cascadelake
 // cascadelake: "-target-cpu" "cascadelake"
 //
+// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=cooperlake 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=cooperlake
+// cooperlake: "-target-cpu" "cooperlake"
+//
 // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=knl 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=knl
 // knl: "-target-cpu" "knl"
Index: cfe/trunk/test/Misc/target-invalid-cpu-note.c
===
--- cfe/trunk/test/Misc/target-invalid-cpu-note.c
+++ cfe/trunk/test/Misc/target-invalid-cpu-note.c
@@ -16,7 +16,7 @@
 // X86-SAME: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont,
 // X86-SAME: nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge,
 // X86-SAME: core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512,
-// X86-SAME: skx, cascadelake, cannonlake, icelake-client, icelake-server, knl, knm, lakemont, k6, k6-2, k6-3,
+// X86-SAME: skx, cascadelake, cooperlake, cannonlake, icelake-client, icelake-server, knl, knm, lakemont, k6, k6-2, k6-3,
 // X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64,
 // X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10,
 // X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2,
@@ -27,7 +27,7 @@
 // X86_64: note: valid target CPU values are: nocona, core2, penryn, bonnell,
 // X86_64-SAME: atom, silvermont, slm, goldmont, gold

[PATCH] D62363: [X86] Enable intrinsics that convert float and bf16 data to each other

2019-06-10 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL363018: [X86] Enable intrinsics that convert float and bf16 
data to each other (authored by pengfei, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D62363?vs=203762&id=203950#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62363/new/

https://reviews.llvm.org/D62363

Files:
  cfe/trunk/include/clang/Basic/BuiltinsX86.def
  cfe/trunk/lib/CodeGen/CGBuiltin.cpp
  cfe/trunk/lib/Headers/avx512bf16intrin.h
  cfe/trunk/lib/Headers/avx512vlbf16intrin.h
  cfe/trunk/test/CodeGen/avx512bf16-builtins.c
  cfe/trunk/test/CodeGen/avx512vlbf16-builtins.c

Index: cfe/trunk/lib/Headers/avx512vlbf16intrin.h
===
--- cfe/trunk/lib/Headers/avx512vlbf16intrin.h
+++ cfe/trunk/lib/Headers/avx512vlbf16intrin.h
@@ -403,6 +403,71 @@
 (__v8sf)_mm256_setzero_si256());
 }
 
+/// Convert One Single float Data to One BF16 Data.
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the  VCVTNEPS2BF16  instructions.
+///
+/// \param __A
+///A float data.
+/// \returns A bf16 data whose sign field and exponent field keep unchanged,
+///and fraction field is truncated to 7 bits.
+static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) {
+  __v4sf __V = {__A, 0, 0, 0};
+  __v8hi __R = __builtin_ia32_cvtneps2bf16_128_mask(
+  (__v4sf)__V, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
+  return __R[0];
+}
+
+/// Convert Packed BF16 Data to Packed float Data.
+///
+/// \headerfile 
+///
+/// \param __A
+///A 128-bit vector of [8 x bfloat].
+/// \returns A 256-bit vector of [8 x float] come from convertion of __A
+static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) {
+  return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
+  (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16));
+}
+
+/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
+///
+/// \headerfile 
+///
+/// \param __U
+///A 8-bit mask. Elements are zeroed out when the corresponding mask
+///bit is not set.
+/// \param __A
+///A 128-bit vector of [8 x bfloat].
+/// \returns A 256-bit vector of [8 x float] come from convertion of __A
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
+  return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
+  (__m256i)_mm256_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
+}
+
+/// Convert Packed BF16 Data to Packed float Data using merging mask.
+///
+/// \headerfile 
+///
+/// \param __S
+///A 256-bit vector of [8 x float]. Elements are copied from __S when
+/// the corresponding mask bit is not set.
+/// \param __U
+///A 8-bit mask. Elements are zeroed out when the corresponding mask
+///bit is not set.
+/// \param __A
+///A 128-bit vector of [8 x bfloat].
+/// \returns A 256-bit vector of [8 x float] come from convertion of __A
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtpbh_ps(__m256 __S, __mmask8 __U, __m128bh __A) {
+  return _mm256_castsi256_ps((__m256i)_mm256_mask_slli_epi32(
+  (__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32((__m128i)__A),
+  16));
+}
+
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
 
Index: cfe/trunk/lib/Headers/avx512bf16intrin.h
===
--- cfe/trunk/lib/Headers/avx512bf16intrin.h
+++ cfe/trunk/lib/Headers/avx512bf16intrin.h
@@ -15,10 +15,27 @@
 
 typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
 typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
+typedef unsigned short __bfloat16;
 
 #define __DEFAULT_FN_ATTRS512 \
   __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \
  __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16")))
+
+/// Convert One BF16 Data to One Single Float Data.
+///
+/// \headerfile 
+///
+/// This intrinsic does not correspond to a specific instruction.
+///
+/// \param __A
+///A bfloat data.
+/// \returns A float data whose sign field and exponent field keep unchanged,
+///and fraction field is extended to 23 bits.
+static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(__bfloat16 __A) {
+  return __builtin_ia32_cvtsbf162ss_32(__A);
+}
 
 /// Convert Two Packed Single Data to One Packed BF16 Data.
 ///
@@ -209,6 +226,54 @@
(__v16sf)_mm512_setzero_si512());
 }
 
+/// Convert Packed BF16 Data to Packed float Data.
+///
+/// \headerfile 
+///
+/// \param __A
+///A 256-bit vector of [16 x bfloat].
+/// \returns A 512

[PATCH] D59744: Fix i386 ABI "__m64" type bug

2019-06-11 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL363116: [X86] [ABI] Fix i386 ABI "__m64" type bug 
(authored by pengfei, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D59744?vs=204197&id=204203#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59744/new/

https://reviews.llvm.org/D59744

Files:
  cfe/trunk/lib/CodeGen/TargetInfo.cpp
  cfe/trunk/test/CodeGen/x86_32-arguments-linux.c
  cfe/trunk/test/CodeGen/x86_32-m64.c

Index: cfe/trunk/test/CodeGen/x86_32-arguments-linux.c
===
--- cfe/trunk/test/CodeGen/x86_32-arguments-linux.c
+++ cfe/trunk/test/CodeGen/x86_32-arguments-linux.c
@@ -3,7 +3,7 @@
 
 // CHECK-LABEL: define void @f56(
 // CHECK: i8 signext %a0, %struct.s56_0* byval(%struct.s56_0) align 4 %a1,
-// CHECK: i64 %a2.coerce, %struct.s56_1* byval(%struct.s56_1) align 4,
+// CHECK: x86_mmx %a2.coerce, %struct.s56_1* byval(%struct.s56_1) align 4,
 // CHECK: <1 x double> %a4, %struct.s56_2* byval(%struct.s56_2) align 4,
 // CHECK: <4 x i32> %a6, %struct.s56_3* byval(%struct.s56_3) align 4,
 // CHECK: <2 x double> %a8, %struct.s56_4* byval(%struct.s56_4) align 4,
@@ -12,7 +12,7 @@
 
 // CHECK: call void (i32, ...) @f56_0(i32 1,
 // CHECK: i32 %{{.*}}, %struct.s56_0* byval(%struct.s56_0) align 4 %{{[^ ]*}},
-// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval(%struct.s56_1) align 4 %{{[^ ]*}},
+// CHECK: x86_mmx %{{[^ ]*}}, %struct.s56_1* byval(%struct.s56_1) align 4 %{{[^ ]*}},
 // CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval(%struct.s56_2) align 4 %{{[^ ]*}},
 // CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval(%struct.s56_3) align 4 %{{[^ ]*}},
 // CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval(%struct.s56_4) align 4 %{{[^ ]*}},
Index: cfe/trunk/test/CodeGen/x86_32-m64.c
===
--- cfe/trunk/test/CodeGen/x86_32-m64.c
+++ cfe/trunk/test/CodeGen/x86_32-m64.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -w -O2 -fblocks -triple i386-pc-linux-gnu -target-cpu pentium4 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LINUX
+// RUN: %clang_cc1 -w -O2 -fblocks -triple i386-netbsd -target-cpu pentium4 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NETBSD
+// RUN: %clang_cc1 -w -O2 -fblocks -triple i386-apple-darwin9 -target-cpu yonah -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,DARWIN
+// RUN: %clang_cc1 -w -O2 -fblocks -triple i386-pc-elfiamcu -mfloat-abi soft -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,IAMCU
+// RUN: %clang_cc1 -w -O2 -fblocks -triple i386-pc-win32 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN32
+
+#include 
+__m64 m64;
+void callee(__m64 __m1, __m64 __m2);
+__m64 caller(__m64 __m1, __m64 __m2)
+{
+// LINUX-LABEL: define x86_mmx @caller(x86_mmx %__m1.coerce, x86_mmx %__m2.coerce)
+// LINUX: tail call void @callee(x86_mmx %__m2.coerce, x86_mmx %__m1.coerce)
+// LINUX: ret x86_mmx
+// NETBSD-LABEL: define x86_mmx @caller(x86_mmx %__m1.coerce, x86_mmx %__m2.coerce)
+// NETBSD: tail call void @callee(x86_mmx %__m2.coerce, x86_mmx %__m1.coerce)
+// NETBSD: ret x86_mmx
+// DARWIN-LABEL: define i64 @caller(i64 %__m1.coerce, i64 %__m2.coerce)
+// DARWIN: tail call void @callee(i64 %__m2.coerce, i64 %__m1.coerce)
+// DARWIN: ret i64
+// IAMCU-LABEL: define <1 x i64> @caller(i64 %__m1.coerce, i64 %__m2.coerce)
+// IAMCU: tail call void @callee(i64 %__m2.coerce, i64 %__m1.coerce)
+// IAMCU: ret <1 x i64>
+// WIN32-LABEL: define dso_local <1 x i64> @caller(i64 %__m1.coerce, i64 %__m2.coerce)
+// WIN32: call void @callee(i64 %__m2.coerce, i64 %__m1.coerce)
+// WIN32: ret <1 x i64>
+  callee(__m2, __m1);
+  return m64;
+}
Index: cfe/trunk/lib/CodeGen/TargetInfo.cpp
===
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp
@@ -915,14 +915,6 @@
: ABIArgInfo::getDirect());
 }
 
-/// IsX86_MMXType - Return true if this is an MMX type.
-bool IsX86_MMXType(llvm::Type *IRType) {
-  // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>.
-  return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 &&
-cast(IRType)->getElementType()->isIntegerTy() &&
-IRType->getScalarSizeInBits() != 64;
-}
-
 static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
   StringRef Constraint,
   llvm::Type* Ty) {
@@ -1011,6 +1003,7 @@
   bool IsSoftFloatABI;
   bool IsMCUABI;
   unsigned DefaultNumRegisterParameters;
+  bool IsMMXEnabled;
 
   static bool isRegisterSize(unsigned Size) {
 return (Size == 8 || Size == 16 || Size == 32 || Size == 64);
@@ -1070,13 +1063,15 @@
 
   X86_32ABIInfo(CodeGen::CodeGenTypes 

[PATCH] D64389: [NFC] [X86] Fix scan-build complaining

2019-07-09 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei created this revision.
pengfei added reviewers: RKSimon, xiangzhangllvm, craig.topper.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Remove unused variable. This fixes bug:
https://bugs.llvm.org/show_bug.cgi?id=42526

Signed-off-by: pengfei 


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D64389

Files:
  clang/lib/CodeGen/CGBuiltin.cpp


Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -11755,12 +11755,11 @@
 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
 Value *Result = Builder.CreateExtractValue(Call, 0);
 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
-Value *Store = Builder.CreateDefaultAlignedStore(Result, Ops[2]);
+Builder.CreateDefaultAlignedStore(Result, Ops[2]);
 
 Result = Builder.CreateExtractValue(Call, 1);
 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
-Store = Builder.CreateDefaultAlignedStore(Result, Ops[3]);
-return Store;
+return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
   }
 
   case X86::BI__builtin_ia32_vpmultishiftqb128:


Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -11755,12 +11755,11 @@
 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
 Value *Result = Builder.CreateExtractValue(Call, 0);
 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
-Value *Store = Builder.CreateDefaultAlignedStore(Result, Ops[2]);
+Builder.CreateDefaultAlignedStore(Result, Ops[2]);
 
 Result = Builder.CreateExtractValue(Call, 1);
 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
-Store = Builder.CreateDefaultAlignedStore(Result, Ops[3]);
-return Store;
+return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
   }
 
   case X86::BI__builtin_ia32_vpmultishiftqb128:
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64389: [NFC] [X86] Fix scan-build complaining

2019-07-09 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL365473: [NFC] [X86] Fix scan-build complaining (authored by 
pengfei, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D64389?vs=208605&id=208653#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64389/new/

https://reviews.llvm.org/D64389

Files:
  cfe/trunk/lib/CodeGen/CGBuiltin.cpp


Index: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
===
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp
@@ -11776,12 +11776,11 @@
 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
 Value *Result = Builder.CreateExtractValue(Call, 0);
 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
-Value *Store = Builder.CreateDefaultAlignedStore(Result, Ops[2]);
+Builder.CreateDefaultAlignedStore(Result, Ops[2]);
 
 Result = Builder.CreateExtractValue(Call, 1);
 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
-Store = Builder.CreateDefaultAlignedStore(Result, Ops[3]);
-return Store;
+return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
   }
 
   case X86::BI__builtin_ia32_vpmultishiftqb128:


Index: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
===
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp
@@ -11776,12 +11776,11 @@
 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
 Value *Result = Builder.CreateExtractValue(Call, 0);
 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
-Value *Store = Builder.CreateDefaultAlignedStore(Result, Ops[2]);
+Builder.CreateDefaultAlignedStore(Result, Ops[2]);
 
 Result = Builder.CreateExtractValue(Call, 1);
 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
-Store = Builder.CreateDefaultAlignedStore(Result, Ops[3]);
-return Store;
+return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
   }
 
   case X86::BI__builtin_ia32_vpmultishiftqb128:
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D60748: Fix i386 struct and union parameter alignment

2019-05-29 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rC361934: [X86] Fix i386 struct and union parameter alignment 
(authored by pengfei, committed by ).

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60748/new/

https://reviews.llvm.org/D60748

Files:
  lib/CodeGen/TargetInfo.cpp
  test/CodeGen/x86_32-align-linux.c
  test/CodeGen/x86_32-arguments-linux.c

Index: lib/CodeGen/TargetInfo.cpp
===
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -1010,6 +1010,7 @@
   bool IsWin32StructABI;
   bool IsSoftFloatABI;
   bool IsMCUABI;
+  bool IsLinuxABI;
   unsigned DefaultNumRegisterParameters;
 
   static bool isRegisterSize(unsigned Size) {
@@ -1076,6 +1077,7 @@
   IsWin32StructABI(Win32StructABI),
   IsSoftFloatABI(SoftFloatABI),
   IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
+  IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()),
   DefaultNumRegisterParameters(NumRegisterParameters) {}
 
   bool shouldPassIndirectlyForSwift(ArrayRef scalars,
@@ -1492,8 +1494,15 @@
   if (Align <= MinABIStackAlignInBytes)
 return 0; // Use default alignment.
 
-  // On non-Darwin, the stack type alignment is always 4.
-  if (!IsDarwinVectorABI) {
+  if (IsLinuxABI) {
+// i386 System V ABI 2.1: Structures and unions assume the alignment of their
+// most strictly aligned component.
+//
+// Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't
+// want to spend any effort dealing with the ramifications of ABI breaks.
+return Align;
+  } else if (!IsDarwinVectorABI) {
+// On non-Darwin and non-Linux, the stack type alignment is always 4.
 // Set explicit alignment, since we may need to realign the top.
 return MinABIStackAlignInBytes;
   }
Index: test/CodeGen/x86_32-align-linux.c
===
--- test/CodeGen/x86_32-align-linux.c
+++ test/CodeGen/x86_32-align-linux.c
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include 
+
+typedef union {
+int d[4];
+__m128 m;
+} M128;
+
+extern void foo(int, ...);
+
+M128 a;
+
+// CHECK-LABEL: define void @test
+// CHECK: entry:
+// CHECK: call void (i32, ...) @foo(i32 1, %union.M128* byval align 16
+// CHECK: call void (i32, ...) @foo(i32 1, <4 x float>
+
+void test(void)
+{
+  foo(1, a);
+  foo(1, a.m);
+}
+
Index: test/CodeGen/x86_32-arguments-linux.c
===
--- test/CodeGen/x86_32-arguments-linux.c
+++ test/CodeGen/x86_32-arguments-linux.c
@@ -3,21 +3,21 @@
 
 // CHECK-LABEL: define void @f56(
 // CHECK: i8 signext %a0, %struct.s56_0* byval align 4 %a1,
-// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 4,
-// CHECK: <1 x double> %a4, %struct.s56_2* byval align 4,
-// CHECK: <4 x i32> %a6, %struct.s56_3* byval align 4,
-// CHECK: <2 x double> %a8, %struct.s56_4* byval align 4,
-// CHECK: <8 x i32> %a10, %struct.s56_5* byval align 4,
-// CHECK: <4 x double> %a12, %struct.s56_6* byval align 4)
+// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 8 %a3,
+// CHECK: <1 x double> %a4, %struct.s56_2* byval align 8 %a5,
+// CHECK: <4 x i32> %a6, %struct.s56_3* byval align 16 %a7,
+// CHECK: <2 x double> %a8, %struct.s56_4* byval align 16 %a9,
+// CHECK: <8 x i32> %a10, %struct.s56_5* byval align 32 %a11,
+// CHECK: <4 x double> %a12, %struct.s56_6* byval align 32 %a13)
 
 // CHECK: call void (i32, ...) @f56_0(i32 1,
 // CHECK: i32 %{{.*}}, %struct.s56_0* byval align 4 %{{[^ ]*}},
-// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval align 4 %{{[^ ]*}},
-// CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval align 4 %{{[^ ]*}},
-// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval align 4 %{{[^ ]*}},
-// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval align 4 %{{[^ ]*}},
-// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval align 4 %{{[^ ]*}},
-// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval align 4 %{{[^ ]*}})
+// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval align 8 %{{[^ ]*}},
+// CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval align 8 %{{[^ ]*}},
+// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval align 16 %{{[^ ]*}},
+// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval align 16 %{{[^ ]*}},
+// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval align 32 %{{[^ ]*}},
+// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval align 32 %{{[^ ]*}})
 // CHECK: }
 //
 //  [i386] clang misaligns long double in structures
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D56391: Limit COFF 'common' emission to <=32 alignment types.

2019-01-08 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

LGTM, thanks Erich.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D56391/new/

https://reviews.llvm.org/D56391



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D84225: [CFE] Add nomerge function attribute to inline assembly.

2020-07-21 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei created this revision.
pengfei added reviewers: zequanwu, rnk, asbirlea.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Sometimes we also want to avoid merging inline assembly. This patch add
the nomerge function attribute to inline assembly.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D84225

Files:
  clang/lib/CodeGen/CGStmt.cpp
  clang/lib/Sema/SemaStmtAttr.cpp
  clang/test/CodeGen/attr-nomerge.cpp


Index: clang/test/CodeGen/attr-nomerge.cpp
===
--- clang/test/CodeGen/attr-nomerge.cpp
+++ clang/test/CodeGen/attr-nomerge.cpp
@@ -10,6 +10,7 @@
   [[clang::nomerge]] f(bar(), bar());
   [[clang::nomerge]] [] { bar(); bar(); }(); // nomerge only applies to the 
anonymous function call
   [[clang::nomerge]] for (bar(); bar(); bar()) {}
+  [[clang::nomerge]] { asm("nop"); }
   bar();
 }
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR:[0-9]+]]
@@ -22,5 +23,7 @@
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR]]
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR]]
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR]]
+// CHECK: call void asm {{.*}} #[[NOMERGEATTR2:[0-9]+]]
 // CHECK: call zeroext i1 @_Z3barv()
 // CHECK: attributes #[[NOMERGEATTR]] = { nomerge }
+// CHECK: attributes #[[NOMERGEATTR2]] = { nomerge nounwind }
Index: clang/lib/Sema/SemaStmtAttr.cpp
===
--- clang/lib/Sema/SemaStmtAttr.cpp
+++ clang/lib/Sema/SemaStmtAttr.cpp
@@ -183,6 +183,7 @@
   bool foundCallExpr() { return FoundCallExpr; }
 
   void VisitCallExpr(const CallExpr *E) { FoundCallExpr = true; }
+  void VisitAsmStmt(const AsmStmt *S) { FoundCallExpr = true; }
 
   void Visit(const Stmt *St) {
 if (!St)
Index: clang/lib/CodeGen/CGStmt.cpp
===
--- clang/lib/CodeGen/CGStmt.cpp
+++ clang/lib/CodeGen/CGStmt.cpp
@@ -1954,12 +1954,16 @@
 }
 
 static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
-  bool ReadOnly, bool ReadNone, const AsmStmt &S,
+  bool ReadOnly, bool ReadNone, bool NoMerge,
+  const AsmStmt &S,
   const std::vector &ResultRegTypes,
   CodeGenFunction &CGF,
   std::vector &RegResults) {
   Result.addAttribute(llvm::AttributeList::FunctionIndex,
   llvm::Attribute::NoUnwind);
+  if (NoMerge)
+Result.addAttribute(llvm::AttributeList::FunctionIndex,
+llvm::Attribute::NoMerge);
   // Attach readnone and readonly attributes.
   if (!HasSideEffect) {
 if (ReadNone)
@@ -2334,12 +2338,14 @@
 Builder.CreateCallBr(IA, Fallthrough, Transfer, Args);
 EmitBlock(Fallthrough);
 UpdateAsmCallInst(cast(*Result), HasSideEffect, ReadOnly,
-  ReadNone, S, ResultRegTypes, *this, RegResults);
+  ReadNone, InNoMergeAttributedStmt, S, ResultRegTypes,
+  *this, RegResults);
   } else {
 llvm::CallInst *Result =
 Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
 UpdateAsmCallInst(cast(*Result), HasSideEffect, ReadOnly,
-  ReadNone, S, ResultRegTypes, *this, RegResults);
+  ReadNone, InNoMergeAttributedStmt, S, ResultRegTypes,
+  *this, RegResults);
   }
 
   assert(RegResults.size() == ResultRegTypes.size());


Index: clang/test/CodeGen/attr-nomerge.cpp
===
--- clang/test/CodeGen/attr-nomerge.cpp
+++ clang/test/CodeGen/attr-nomerge.cpp
@@ -10,6 +10,7 @@
   [[clang::nomerge]] f(bar(), bar());
   [[clang::nomerge]] [] { bar(); bar(); }(); // nomerge only applies to the anonymous function call
   [[clang::nomerge]] for (bar(); bar(); bar()) {}
+  [[clang::nomerge]] { asm("nop"); }
   bar();
 }
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR:[0-9]+]]
@@ -22,5 +23,7 @@
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR]]
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR]]
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR]]
+// CHECK: call void asm {{.*}} #[[NOMERGEATTR2:[0-9]+]]
 // CHECK: call zeroext i1 @_Z3barv()
 // CHECK: attributes #[[NOMERGEATTR]] = { nomerge }
+// CHECK: attributes #[[NOMERGEATTR2]] = { nomerge nounwind }
Index: clang/lib/Sema/SemaStmtAttr.cpp
===
--- clang/lib/Sema/SemaStmtAttr.cpp
+++ clang/lib/Sema/SemaStmtAttr.cpp
@@ -183,6 +183,7 @@
   bool foundCallExpr() { return FoundCallExpr; }
 
   void VisitCallExpr(const CallExpr *E) { FoundCallExpr = true; }
+  void VisitAsmStmt(const AsmStmt *S) { FoundCallExpr = true; }
 
   void Visit(const Stmt *St) {
 if (!St)
Index: clang/lib/CodeGen/CGStmt.cpp
==

[PATCH] D84225: [CFE] Add nomerge function attribute to inline assembly.

2020-07-21 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG18581fd2c441: [CFE] Add nomerge function attribute to inline 
assembly. (authored by pengfei).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84225/new/

https://reviews.llvm.org/D84225

Files:
  clang/lib/CodeGen/CGStmt.cpp
  clang/lib/Sema/SemaStmtAttr.cpp
  clang/test/CodeGen/attr-nomerge.cpp


Index: clang/test/CodeGen/attr-nomerge.cpp
===
--- clang/test/CodeGen/attr-nomerge.cpp
+++ clang/test/CodeGen/attr-nomerge.cpp
@@ -10,6 +10,7 @@
   [[clang::nomerge]] f(bar(), bar());
   [[clang::nomerge]] [] { bar(); bar(); }(); // nomerge only applies to the 
anonymous function call
   [[clang::nomerge]] for (bar(); bar(); bar()) {}
+  [[clang::nomerge]] { asm("nop"); }
   bar();
 }
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR:[0-9]+]]
@@ -22,5 +23,7 @@
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR]]
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR]]
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR]]
+// CHECK: call void asm {{.*}} #[[NOMERGEATTR2:[0-9]+]]
 // CHECK: call zeroext i1 @_Z3barv()
 // CHECK: attributes #[[NOMERGEATTR]] = { nomerge }
+// CHECK: attributes #[[NOMERGEATTR2]] = { nomerge nounwind }
Index: clang/lib/Sema/SemaStmtAttr.cpp
===
--- clang/lib/Sema/SemaStmtAttr.cpp
+++ clang/lib/Sema/SemaStmtAttr.cpp
@@ -183,6 +183,7 @@
   bool foundCallExpr() { return FoundCallExpr; }
 
   void VisitCallExpr(const CallExpr *E) { FoundCallExpr = true; }
+  void VisitAsmStmt(const AsmStmt *S) { FoundCallExpr = true; }
 
   void Visit(const Stmt *St) {
 if (!St)
Index: clang/lib/CodeGen/CGStmt.cpp
===
--- clang/lib/CodeGen/CGStmt.cpp
+++ clang/lib/CodeGen/CGStmt.cpp
@@ -1954,12 +1954,16 @@
 }
 
 static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
-  bool ReadOnly, bool ReadNone, const AsmStmt &S,
+  bool ReadOnly, bool ReadNone, bool NoMerge,
+  const AsmStmt &S,
   const std::vector &ResultRegTypes,
   CodeGenFunction &CGF,
   std::vector &RegResults) {
   Result.addAttribute(llvm::AttributeList::FunctionIndex,
   llvm::Attribute::NoUnwind);
+  if (NoMerge)
+Result.addAttribute(llvm::AttributeList::FunctionIndex,
+llvm::Attribute::NoMerge);
   // Attach readnone and readonly attributes.
   if (!HasSideEffect) {
 if (ReadNone)
@@ -2334,12 +2338,14 @@
 Builder.CreateCallBr(IA, Fallthrough, Transfer, Args);
 EmitBlock(Fallthrough);
 UpdateAsmCallInst(cast(*Result), HasSideEffect, ReadOnly,
-  ReadNone, S, ResultRegTypes, *this, RegResults);
+  ReadNone, InNoMergeAttributedStmt, S, ResultRegTypes,
+  *this, RegResults);
   } else {
 llvm::CallInst *Result =
 Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
 UpdateAsmCallInst(cast(*Result), HasSideEffect, ReadOnly,
-  ReadNone, S, ResultRegTypes, *this, RegResults);
+  ReadNone, InNoMergeAttributedStmt, S, ResultRegTypes,
+  *this, RegResults);
   }
 
   assert(RegResults.size() == ResultRegTypes.size());


Index: clang/test/CodeGen/attr-nomerge.cpp
===
--- clang/test/CodeGen/attr-nomerge.cpp
+++ clang/test/CodeGen/attr-nomerge.cpp
@@ -10,6 +10,7 @@
   [[clang::nomerge]] f(bar(), bar());
   [[clang::nomerge]] [] { bar(); bar(); }(); // nomerge only applies to the anonymous function call
   [[clang::nomerge]] for (bar(); bar(); bar()) {}
+  [[clang::nomerge]] { asm("nop"); }
   bar();
 }
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR:[0-9]+]]
@@ -22,5 +23,7 @@
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR]]
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR]]
 // CHECK: call zeroext i1 @_Z3barv() #[[NOMERGEATTR]]
+// CHECK: call void asm {{.*}} #[[NOMERGEATTR2:[0-9]+]]
 // CHECK: call zeroext i1 @_Z3barv()
 // CHECK: attributes #[[NOMERGEATTR]] = { nomerge }
+// CHECK: attributes #[[NOMERGEATTR2]] = { nomerge nounwind }
Index: clang/lib/Sema/SemaStmtAttr.cpp
===
--- clang/lib/Sema/SemaStmtAttr.cpp
+++ clang/lib/Sema/SemaStmtAttr.cpp
@@ -183,6 +183,7 @@
   bool foundCallExpr() { return FoundCallExpr; }
 
   void VisitCallExpr(const CallExpr *E) { FoundCallExpr = true; }
+  void VisitAsmStmt(const AsmStmt *S) { FoundCallExpr = true; }
 
   void Visit(const Stmt *St) {
 if (!St)
Index: clang/lib/CodeGen/CGStmt.cpp
=

[PATCH] D85385: [X86][FPEnv] Teach X86 mask compare intrinsics to respect strict FP semantics.

2020-08-10 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

Ping.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D85385/new/

https://reviews.llvm.org/D85385

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D85385: [X86][FPEnv] Teach X86 mask compare intrinsics to respect strict FP semantics.

2020-08-10 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/test/CodeGen/avx512f-builtins-constrained-cmp.c:793
   // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_true_us
-  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x 
double> %{{.*}}, <8 x double> %{{.*}}, i32 31, i32 4)
-  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  // CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> 
%{{.*}}, <8 x double> %{{.*}}, i32 31, <8 x i1> {{.*}}, i32 4) #2
   return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_US);

craig.topper wrote:
> I missed this in our internal review. All of these check lines check "#2" at 
> the end, do we need that? 
No. I forgot to remove them. Thank you.



Comment at: llvm/lib/IR/AutoUpgrade.cpp:3764
+
+if (NumElts < 8) {
+  int Indices[8];

craig.topper wrote:
> Can we use getX86MaskVec here?
Sure. It's more clean now. Thanks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D85385/new/

https://reviews.llvm.org/D85385

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106210: [MS] Preserve base register %esi around movs[bwl]

2021-07-23 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG91bc85b1ebaa: [MS] Preserve base register %esi around 
movs[bwl] (authored by namazso, committed by pengfei).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106210/new/

https://reviews.llvm.org/D106210

Files:
  clang/lib/Headers/intrin.h
  clang/test/CodeGen/ms-intrinsics.c


Index: clang/test/CodeGen/ms-intrinsics.c
===
--- clang/test/CodeGen/ms-intrinsics.c
+++ clang/test/CodeGen/ms-intrinsics.c
@@ -36,7 +36,7 @@
   return __movsb(Dest, Src, Count);
 }
 // CHECK-I386-LABEL: define{{.*}} void @test__movsb
-// CHECK-I386:   call { i8*, i8*, i32 } asm sideeffect "rep movsb", 
"={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %Dest, i8* 
%Src, i32 %Count)
+// CHECK-I386:   tail call { i8*, i8*, i32 } asm sideeffect "xchg %esi, 
$1\0Arep movsb\0Axchg %esi, $1", 
"={di},=r,={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %Dest, i8* 
%Src, i32 %Count)
 // CHECK-I386:   ret void
 // CHECK-I386: }
 
@@ -62,7 +62,7 @@
   return __movsw(Dest, Src, Count);
 }
 // CHECK-I386-LABEL: define{{.*}} void @test__movsw
-// CHECK-I386:   call { i16*, i16*, i32 } asm sideeffect "rep movsw", 
"={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags}"(i16* %Dest, 
i16* %Src, i32 %Count)
+// CHECK-I386:   tail call { i16*, i16*, i32 } asm sideeffect "xchg %esi, 
$1\0Arep movsw\0Axchg %esi, $1", 
"={di},=r,={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags}"(i16* %Dest, i16* 
%Src, i32 %Count)
 // CHECK-I386:   ret void
 // CHECK-I386: }
 
@@ -88,7 +88,7 @@
   return __movsd(Dest, Src, Count);
 }
 // CHECK-I386-LABEL: define{{.*}} void @test__movsd
-// CHECK-I386:   call { i32*, i32*, i32 } asm sideeffect "rep movsl", 
"={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %Dest, 
i32* %Src, i32 %Count)
+// CHECK-I386:   tail call { i32*, i32*, i32 } asm sideeffect "xchg %esi, 
$1\0Arep movsl\0Axchg %esi, $1", 
"={di},=r,={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %Dest, i32* 
%Src, i32 %Count)
 // CHECK-I386:   ret void
 // CHECK-I386: }
 
Index: clang/lib/Headers/intrin.h
===
--- clang/lib/Headers/intrin.h
+++ clang/lib/Headers/intrin.h
@@ -451,24 +451,47 @@
 static __inline__ void __DEFAULT_FN_ATTRS __movsb(unsigned char *__dst,
   unsigned char const *__src,
   size_t __n) {
-  __asm__ __volatile__("rep movsb" : "+D"(__dst), "+S"(__src), "+c"(__n)
-   : : "memory");
+#if defined(__x86_64__)
+  __asm__ __volatile__("rep movsb"
+   : "+D"(__dst), "+S"(__src), "+c"(__n)
+   :
+   : "memory");
+#else
+  __asm__ __volatile__("xchg %%esi, %1\nrep movsb\nxchg %%esi, %1"
+   : "+D"(__dst), "+r"(__src), "+c"(__n)
+   :
+   : "memory");
+#endif
 }
 static __inline__ void __DEFAULT_FN_ATTRS __movsd(unsigned long *__dst,
   unsigned long const *__src,
   size_t __n) {
+#if defined(__x86_64__)
   __asm__ __volatile__("rep movsl"
: "+D"(__dst), "+S"(__src), "+c"(__n)
:
: "memory");
+#else
+  __asm__ __volatile__("xchg %%esi, %1\nrep movsl\nxchg %%esi, %1"
+   : "+D"(__dst), "+r"(__src), "+c"(__n)
+   :
+   : "memory");
+#endif
 }
 static __inline__ void __DEFAULT_FN_ATTRS __movsw(unsigned short *__dst,
   unsigned short const *__src,
   size_t __n) {
+#if defined(__x86_64__)
   __asm__ __volatile__("rep movsw"
: "+D"(__dst), "+S"(__src), "+c"(__n)
:
: "memory");
+#else
+  __asm__ __volatile__("xchg %%esi, %1\nrep movsw\nxchg %%esi, %1"
+   : "+D"(__dst), "+r"(__src), "+c"(__n)
+   :
+   : "memory");
+#endif
 }
 static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst,
   unsigned long __x,


Index: clang/test/CodeGen/ms-intrinsics.c
===
--- clang/test/CodeGen/ms-intrinsics.c
+++ clang/test/CodeGen/ms-intrinsics.c
@@ -36,7 +36,7 @@
   return __movsb(Dest, Src, Count);
 }
 // CHECK-I386-LABEL: define{{.*}} void @test__movsb
-// CHECK-I386:   call { i8*, i8*, i32 } asm sideeffect "rep movsb", "={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %Dest, i8

[PATCH] D106210: [MS] Preserve base register %esi around movs[bwl]

2021-07-23 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

Happened to see it. I thought Craig might miss it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106210/new/

https://reviews.llvm.org/D106210

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106790: prfchwintrin.h: Make _m_prefetchw take a pointer to volatile (PR49124)

2021-07-26 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/lib/Headers/prfchwintrin.h:54
+#pragma clang diagnostic ignored "-Wcast-qual"
+  __builtin_prefetch ((const void*)__P, 1, 3 /* _MM_HINT_T0 */);
+#pragma clang diagnostic pop

Can we declare `__builtin_prefetch` to volatile one in Builtins.def:
```
BUILTIN(__builtin_prefetch, "vvCD*.", "nc")
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106790/new/

https://reviews.llvm.org/D106790

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106849: [NFC][X86] add missing tests in clang/test/CodeGen/attr-target-mv.c

2021-07-26 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106849/new/

https://reviews.llvm.org/D106849

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107082: [X86][RFC] Enable `_Float16` type support on X86 following the psABI

2021-07-29 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

I sent out this patch mainly for PoC of the ABI changes, I'll fix the 
performance regressions in next phase.
LLVM was using a different calling conversion on x86 when passing and returning 
half type. It conflicts with current X86 psABI.
I have evaluated the risk internally and think the change of ABI has low risk 
due to Clang doesn't use such calling conversion. But I may not be thoughtful 
enough. Questions and comments are appreciated.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107082/new/

https://reviews.llvm.org/D107082

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107082: [X86][RFC] Enable `_Float16` type support on X86 following the psABI

2021-07-29 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: llvm/include/llvm/IR/RuntimeLibcalls.def:293-294
 HANDLE_LIBCALL(FPEXT_F16_F64, "__extendhfdf2")
 HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee")
 HANDLE_LIBCALL(FPROUND_F32_F16, "__gnu_f2h_ieee")
 HANDLE_LIBCALL(FPROUND_F64_F16, "__truncdfhf2")

GCC12 will provide functions `__extendhfsf2` and `__truncsfhf2`. I wonder if I 
can change it directly here or do extra customization for ARM/AArch64? Other 
targets?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107082/new/

https://reviews.llvm.org/D107082

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107082: [X86][RFC] Enable `_Float16` type support on X86 following the psABI

2021-07-29 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

In D107082#2913881 , @craig.topper 
wrote:

> I haven't had a chance to look at this patch in detail, but I wanted to ask 
> if you considered doing what ARM and RISCV do for this. They pass the f16 in 
> the lower bits on an f32 by only changing the ABI handling code in the 
> backend. The type legalizer takes care of the rest. That seems simpler than 
> this patch. See for example https://reviews.llvm.org/D98670

Thanks Craig for the information. I referenced implementation in AArch64. I 
think we have to add a legal f16 type in this way because:

1. We will support `_Float16` type in Clang on SSE2 and above to keep the same 
behavior with GCC. So a legal type is a must.
2. Using lower 16bits of f32 may not satisfice the requirment from calling 
conversion of aggregation type and complex type defined by psABI.
3. We have some optimizations to leverage F16C or AVX512 ps2ph/ph2ps 
instructions. A legal type is easy to customize.

Besides, we have full arithmatic f16 support in AVX512FP16. Most of the code 
here are shared and served for both scenarios. We just need to promote for most 
FP operations and expand or customize `FP_ROUND` and `FP_EXTEND` here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107082/new/

https://reviews.llvm.org/D107082

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D72820: Add pragma FP_CONTRACT support.

2020-01-15 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei created this revision.
pengfei added reviewers: craig.topper, andrew.w.kaylor, uweigand, RKSimon, 
LiuChen3.
Herald added subscribers: llvm-commits, cfe-commits, jdoerfert, hiraditya.
Herald added projects: clang, LLVM.

Support pragma FP_CONTRACT.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D72820

Files:
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/constrained-math-builtins.c
  llvm/docs/LangRef.rst
  llvm/include/llvm/CodeGen/BasicTTIImpl.h
  llvm/include/llvm/CodeGen/ISDOpcodes.h
  llvm/include/llvm/IR/ConstrainedOps.def
  llvm/include/llvm/IR/Intrinsics.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/lib/Target/X86/X86ISelLowering.h
  llvm/lib/Target/X86/X86InstrAVX512.td
  llvm/lib/Target/X86/X86InstrFMA.td
  llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
  llvm/test/CodeGen/X86/fp-intrinsics-fma.ll

Index: llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
===
--- llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
+++ llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
@@ -1,7 +1,339 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefixes=COMMON,NOFMA
-; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck %s --check-prefixes=COMMON,FMA
-; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefixes=COMMON,FMA
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck %s --check-prefixes=COMMON,FMA,FMA-AVX1
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefixes=COMMON,FMA,FMA-AVX512
+
+define float @f1(float %0, float %1, float %2) #0 {
+; NOFMA-LABEL: f1:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:xorps {{.*}}(%rip), %xmm0
+; NOFMA-NEXT:mulss %xmm1, %xmm0
+; NOFMA-NEXT:addss %xmm2, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f1:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; FMA-NEXT:retq
+entry:
+  %3 = fneg float %0
+  %result = call float @llvm.experimental.constrained.fmuladd.f32(float %3, float %1, float %2,
+  metadata !"round.dynamic",
+  metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define double @f2(double %0, double %1, double %2) #0 {
+; NOFMA-LABEL: f2:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:xorpd {{.*}}(%rip), %xmm0
+; NOFMA-NEXT:mulsd %xmm1, %xmm0
+; NOFMA-NEXT:addsd %xmm2, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f2:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; FMA-NEXT:retq
+entry:
+  %3 = fneg double %0
+  %result = call double @llvm.experimental.constrained.fmuladd.f64(double %3, double %1, double %2,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define float @f3(float %0, float %1, float %2) #0 {
+; NOFMA-LABEL: f3:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:xorps {{.*}}(%rip), %xmm2
+; NOFMA-NEXT:mulss %xmm1, %xmm0
+; NOFMA-NEXT:addss %xmm2, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f3:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
+; FMA-NEXT:retq
+entry:
+  %3 = fneg float %2
+  %result = call float @llvm.experimental.constrained.fmuladd.f32(float %0, float %1, float %3,
+  metadata !"round.dynamic",
+  metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define double @f4(double %0, double %1, double %2) #0 {
+; NOFMA-LABEL: f4:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:xorpd {{.*}}(%rip), %xmm2
+; NOFMA-NEXT:mulsd %xmm1, %xmm0
+; NOFMA-NEXT:addsd %xmm2, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f4:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
+; FMA-NEXT:retq
+entry:
+  %3 = fneg double %2
+  %result = call double @llvm.experimental.constrained.fmuladd.f64(double %0, double %1, double %3,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define float @f5(float %0, float %1, float %2) #0 {
+; NOFMA-LABEL: f5:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:movaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; NOFMA-NEXT:xorps %xmm3, %xmm0
+; NOFMA-NEXT:xorps %xmm3, %xmm2
+; NOFMA-NEXT:mulss %xmm1,

[PATCH] D72820: Add pragma FP_CONTRACT support.

2020-01-15 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei updated this revision to Diff 238408.
pengfei added a comment.

Remove dead code.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72820/new/

https://reviews.llvm.org/D72820

Files:
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/constrained-math-builtins.c
  llvm/docs/LangRef.rst
  llvm/include/llvm/CodeGen/BasicTTIImpl.h
  llvm/include/llvm/CodeGen/ISDOpcodes.h
  llvm/include/llvm/IR/ConstrainedOps.def
  llvm/include/llvm/IR/Intrinsics.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/lib/Target/X86/X86ISelLowering.h
  llvm/lib/Target/X86/X86InstrAVX512.td
  llvm/lib/Target/X86/X86InstrFMA.td
  llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
  llvm/test/CodeGen/X86/fp-intrinsics-fma.ll

Index: llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
===
--- llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
+++ llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
@@ -1,7 +1,339 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefixes=COMMON,NOFMA
-; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck %s --check-prefixes=COMMON,FMA
-; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefixes=COMMON,FMA
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck %s --check-prefixes=COMMON,FMA,FMA-AVX1
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefixes=COMMON,FMA,FMA-AVX512
+
+define float @f1(float %0, float %1, float %2) #0 {
+; NOFMA-LABEL: f1:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:xorps {{.*}}(%rip), %xmm0
+; NOFMA-NEXT:mulss %xmm1, %xmm0
+; NOFMA-NEXT:addss %xmm2, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f1:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; FMA-NEXT:retq
+entry:
+  %3 = fneg float %0
+  %result = call float @llvm.experimental.constrained.fmuladd.f32(float %3, float %1, float %2,
+  metadata !"round.dynamic",
+  metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define double @f2(double %0, double %1, double %2) #0 {
+; NOFMA-LABEL: f2:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:xorpd {{.*}}(%rip), %xmm0
+; NOFMA-NEXT:mulsd %xmm1, %xmm0
+; NOFMA-NEXT:addsd %xmm2, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f2:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; FMA-NEXT:retq
+entry:
+  %3 = fneg double %0
+  %result = call double @llvm.experimental.constrained.fmuladd.f64(double %3, double %1, double %2,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define float @f3(float %0, float %1, float %2) #0 {
+; NOFMA-LABEL: f3:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:xorps {{.*}}(%rip), %xmm2
+; NOFMA-NEXT:mulss %xmm1, %xmm0
+; NOFMA-NEXT:addss %xmm2, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f3:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
+; FMA-NEXT:retq
+entry:
+  %3 = fneg float %2
+  %result = call float @llvm.experimental.constrained.fmuladd.f32(float %0, float %1, float %3,
+  metadata !"round.dynamic",
+  metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define double @f4(double %0, double %1, double %2) #0 {
+; NOFMA-LABEL: f4:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:xorpd {{.*}}(%rip), %xmm2
+; NOFMA-NEXT:mulsd %xmm1, %xmm0
+; NOFMA-NEXT:addsd %xmm2, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f4:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
+; FMA-NEXT:retq
+entry:
+  %3 = fneg double %2
+  %result = call double @llvm.experimental.constrained.fmuladd.f64(double %0, double %1, double %3,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define float @f5(float %0, float %1, float %2) #0 {
+; NOFMA-LABEL: f5:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:movaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; NOFMA-NEXT:xorps %xmm3, %xmm0
+; NOFMA-NEXT:xorps %xmm3, %xmm2
+; NOFMA-NEXT:mulss %xmm1, %xmm0
+; NOFMA-NEXT:addss %xmm2, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f5:
+; FMA:  

[PATCH] D72820: Add pragma FP_CONTRACT support.

2020-01-15 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei updated this revision to Diff 238414.
pengfei marked an inline comment as done.
pengfei added a comment.

Address review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72820/new/

https://reviews.llvm.org/D72820

Files:
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/constrained-math-builtins.c
  llvm/docs/LangRef.rst
  llvm/include/llvm/CodeGen/BasicTTIImpl.h
  llvm/include/llvm/CodeGen/ISDOpcodes.h
  llvm/include/llvm/IR/ConstrainedOps.def
  llvm/include/llvm/IR/Intrinsics.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/test/CodeGen/X86/fp-intrinsics-fma.ll

Index: llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
===
--- llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
+++ llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
@@ -3,6 +3,104 @@
 ; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck %s --check-prefixes=COMMON,FMA
 ; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefixes=COMMON,FMA
 
+; Verify constrained fmul and fadd aren't fused.
+define float @f11(float %0, float %1, float %2) #0 {
+; NOFMA-LABEL: f11:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:mulss %xmm1, %xmm0
+; NOFMA-NEXT:addss %xmm2, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f11:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vmulss %xmm1, %xmm0, %xmm0
+; FMA-NEXT:vaddss %xmm2, %xmm0, %xmm0
+; FMA-NEXT:retq
+entry:
+  %3 = call float @llvm.experimental.constrained.fmul.f32(float %0, float %1,
+  metadata !"round.dynamic",
+  metadata !"fpexcept.strict") #0
+  %4 = call float @llvm.experimental.constrained.fadd.f32(float %3, float %2,
+  metadata !"round.dynamic",
+  metadata !"fpexcept.strict") #0
+  ret float %4
+}
+
+; Verify constrained fmul and fadd aren't fused.
+define double @f12(double %0, double %1, double %2) #0 {
+; NOFMA-LABEL: f12:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:mulsd %xmm1, %xmm0
+; NOFMA-NEXT:addsd %xmm2, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f12:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vmulsd %xmm1, %xmm0, %xmm0
+; FMA-NEXT:vaddsd %xmm2, %xmm0, %xmm0
+; FMA-NEXT:retq
+entry:
+  %3 = call double @llvm.experimental.constrained.fmul.f64(double %0, double %1,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  %4 = call double @llvm.experimental.constrained.fadd.f64(double %3, double %2,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  ret double %4
+}
+
+; Verify that fmuladd(3.5) isn't simplified when the rounding mode is
+; unknown.
+define float @f15() #0 {
+; NOFMA-LABEL: f15:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NOFMA-NEXT:movaps %xmm1, %xmm0
+; NOFMA-NEXT:mulss %xmm1, %xmm0
+; NOFMA-NEXT:addss %xmm1, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f15:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FMA-NEXT:vfmadd213ss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
+; FMA-NEXT:retq
+entry:
+  %result = call float @llvm.experimental.constrained.fmuladd.f32(
+   float 3.5,
+   float 3.5,
+   float 3.5,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+; Verify that fmuladd(42.1) isn't simplified when the rounding mode is
+; unknown.
+define double @f16() #0 {
+; NOFMA-LABEL: f16:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:movsd {{.*#+}} xmm1 = mem[0],zero
+; NOFMA-NEXT:movapd %xmm1, %xmm0
+; NOFMA-NEXT:mulsd %xmm1, %xmm0
+; NOFMA-NEXT:addsd %xmm1, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f16:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vmovsd {{.*#+}} xmm0 = mem[0],zero
+; FMA-NEXT:vfmadd213sd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
+; FMA-NEXT:retq
+entry:
+  %result = call double @llvm.experimental.constrained.fmuladd.f64(
+   double 42.1,
+   double 42.1,
+   double 42.1,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  ret double 

[PATCH] D72824: [X86] Add combination for fma and fneg on X86 under strict FP.

2020-01-15 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei created this revision.
pengfei added reviewers: craig.topper, andrew.w.kaylor, uweigand, RKSimon, 
LiuChen3.
pengfei added a project: LLVM.

X86 has instructions to calculate fma and fneg at the same time. But we combine 
the fneg and fma only when fneg is the source operand under strict FP.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D72824

Files:
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/lib/Target/X86/X86ISelLowering.h
  llvm/lib/Target/X86/X86InstrAVX512.td
  llvm/lib/Target/X86/X86InstrFMA.td
  llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
  llvm/test/CodeGen/X86/fp-intrinsics-fma.ll

Index: llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
===
--- llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
+++ llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
@@ -1,7 +1,271 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefixes=COMMON,NOFMA
-; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck %s --check-prefixes=COMMON,FMA
-; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefixes=COMMON,FMA
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck %s --check-prefixes=COMMON,FMA,FMA-AVX1
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefixes=COMMON,FMA,FMA-AVX512
+
+define float @f1(float %0, float %1, float %2) #0 {
+; NOFMA-LABEL: f1:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:pushq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 16
+; NOFMA-NEXT:xorps {{.*}}(%rip), %xmm0
+; NOFMA-NEXT:callq fmaf
+; NOFMA-NEXT:popq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 8
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f1:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; FMA-NEXT:retq
+entry:
+  %3 = fneg float %0
+  %result = call float @llvm.experimental.constrained.fma.f32(float %3, float %1, float %2,
+  metadata !"round.dynamic",
+  metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define double @f2(double %0, double %1, double %2) #0 {
+; NOFMA-LABEL: f2:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:pushq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 16
+; NOFMA-NEXT:xorps {{.*}}(%rip), %xmm0
+; NOFMA-NEXT:callq fma
+; NOFMA-NEXT:popq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 8
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f2:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; FMA-NEXT:retq
+entry:
+  %3 = fneg double %0
+  %result = call double @llvm.experimental.constrained.fma.f64(double %3, double %1, double %2,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define float @f3(float %0, float %1, float %2) #0 {
+; NOFMA-LABEL: f3:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:pushq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 16
+; NOFMA-NEXT:xorps {{.*}}(%rip), %xmm2
+; NOFMA-NEXT:callq fmaf
+; NOFMA-NEXT:popq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 8
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f3:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
+; FMA-NEXT:retq
+entry:
+  %3 = fneg float %2
+  %result = call float @llvm.experimental.constrained.fma.f32(float %0, float %1, float %3,
+  metadata !"round.dynamic",
+  metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define double @f4(double %0, double %1, double %2) #0 {
+; NOFMA-LABEL: f4:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:pushq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 16
+; NOFMA-NEXT:xorps {{.*}}(%rip), %xmm2
+; NOFMA-NEXT:callq fma
+; NOFMA-NEXT:popq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 8
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f4:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
+; FMA-NEXT:retq
+entry:
+  %3 = fneg double %2
+  %result = call double @llvm.experimental.constrained.fma.f64(double %0, double %1, double %3,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define float @f5(float %0, float %1, float %2) #0 {
+; NOFMA-LABEL: f5:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:pushq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 16
+; NOFMA-NEXT:movaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+

[PATCH] D72820: Add pragma FP_CONTRACT support.

2020-01-15 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei updated this revision to Diff 238417.
pengfei marked an inline comment as done.
pengfei added a comment.

Address review comment.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72820/new/

https://reviews.llvm.org/D72820

Files:
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/constrained-math-builtins.c
  llvm/docs/LangRef.rst
  llvm/include/llvm/CodeGen/BasicTTIImpl.h
  llvm/include/llvm/CodeGen/ISDOpcodes.h
  llvm/include/llvm/IR/ConstrainedOps.def
  llvm/include/llvm/IR/Intrinsics.td
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/test/CodeGen/X86/fp-intrinsics-fma.ll

Index: llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
===
--- llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
+++ llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
@@ -3,6 +3,104 @@
 ; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck %s --check-prefixes=COMMON,FMA
 ; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefixes=COMMON,FMA
 
+; Verify constrained fmul and fadd aren't fused.
+define float @f11(float %0, float %1, float %2) #0 {
+; NOFMA-LABEL: f11:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:mulss %xmm1, %xmm0
+; NOFMA-NEXT:addss %xmm2, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f11:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vmulss %xmm1, %xmm0, %xmm0
+; FMA-NEXT:vaddss %xmm2, %xmm0, %xmm0
+; FMA-NEXT:retq
+entry:
+  %3 = call float @llvm.experimental.constrained.fmul.f32(float %0, float %1,
+  metadata !"round.dynamic",
+  metadata !"fpexcept.strict") #0
+  %4 = call float @llvm.experimental.constrained.fadd.f32(float %3, float %2,
+  metadata !"round.dynamic",
+  metadata !"fpexcept.strict") #0
+  ret float %4
+}
+
+; Verify constrained fmul and fadd aren't fused.
+define double @f12(double %0, double %1, double %2) #0 {
+; NOFMA-LABEL: f12:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:mulsd %xmm1, %xmm0
+; NOFMA-NEXT:addsd %xmm2, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f12:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vmulsd %xmm1, %xmm0, %xmm0
+; FMA-NEXT:vaddsd %xmm2, %xmm0, %xmm0
+; FMA-NEXT:retq
+entry:
+  %3 = call double @llvm.experimental.constrained.fmul.f64(double %0, double %1,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  %4 = call double @llvm.experimental.constrained.fadd.f64(double %3, double %2,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  ret double %4
+}
+
+; Verify that fmuladd(3.5) isn't simplified when the rounding mode is
+; unknown.
+define float @f15() #0 {
+; NOFMA-LABEL: f15:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; NOFMA-NEXT:movaps %xmm1, %xmm0
+; NOFMA-NEXT:mulss %xmm1, %xmm0
+; NOFMA-NEXT:addss %xmm1, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f15:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FMA-NEXT:vfmadd213ss {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
+; FMA-NEXT:retq
+entry:
+  %result = call float @llvm.experimental.constrained.fmuladd.f32(
+   float 3.5,
+   float 3.5,
+   float 3.5,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+; Verify that fmuladd(42.1) isn't simplified when the rounding mode is
+; unknown.
+define double @f16() #0 {
+; NOFMA-LABEL: f16:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:movsd {{.*#+}} xmm1 = mem[0],zero
+; NOFMA-NEXT:movapd %xmm1, %xmm0
+; NOFMA-NEXT:mulsd %xmm1, %xmm0
+; NOFMA-NEXT:addsd %xmm1, %xmm0
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f16:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vmovsd {{.*#+}} xmm0 = mem[0],zero
+; FMA-NEXT:vfmadd213sd {{.*#+}} xmm0 = (xmm0 * xmm0) + xmm0
+; FMA-NEXT:retq
+entry:
+  %result = call double @llvm.experimental.constrained.fmuladd.f64(
+   double 42.1,
+   double 42.1,
+   double 42.1,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  ret double %

[PATCH] D72824: [X86] Add combination for fma and fneg on X86 under strict FP.

2020-01-16 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei updated this revision to Diff 238425.
pengfei added a comment.

Add FMA4 tests. Thanks @RKSimon.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72824/new/

https://reviews.llvm.org/D72824

Files:
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/lib/Target/X86/X86ISelLowering.h
  llvm/lib/Target/X86/X86InstrAVX512.td
  llvm/lib/Target/X86/X86InstrFMA.td
  llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
  llvm/test/CodeGen/X86/fp-intrinsics-fma.ll

Index: llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
===
--- llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
+++ llvm/test/CodeGen/X86/fp-intrinsics-fma.ll
@@ -1,7 +1,326 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefixes=COMMON,NOFMA
-; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck %s --check-prefixes=COMMON,FMA
-; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefixes=COMMON,FMA
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck %s --check-prefixes=COMMON,FMA,FMA-AVX1
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma4 < %s | FileCheck %s --check-prefixes=COMMON,FMA4
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefixes=COMMON,FMA,FMA-AVX512
+
+define float @f1(float %0, float %1, float %2) #0 {
+; NOFMA-LABEL: f1:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:pushq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 16
+; NOFMA-NEXT:xorps {{.*}}(%rip), %xmm0
+; NOFMA-NEXT:callq fmaf
+; NOFMA-NEXT:popq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 8
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f1:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; FMA-NEXT:retq
+;
+; FMA4-LABEL: f1:
+; FMA4:   # %bb.0: # %entry
+; FMA4-NEXT:vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:retq
+entry:
+  %3 = fneg float %0
+  %result = call float @llvm.experimental.constrained.fma.f32(float %3, float %1, float %2,
+  metadata !"round.dynamic",
+  metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define double @f2(double %0, double %1, double %2) #0 {
+; NOFMA-LABEL: f2:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:pushq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 16
+; NOFMA-NEXT:xorps {{.*}}(%rip), %xmm0
+; NOFMA-NEXT:callq fma
+; NOFMA-NEXT:popq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 8
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f2:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; FMA-NEXT:retq
+;
+; FMA4-LABEL: f2:
+; FMA4:   # %bb.0: # %entry
+; FMA4-NEXT:vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:retq
+entry:
+  %3 = fneg double %0
+  %result = call double @llvm.experimental.constrained.fma.f64(double %3, double %1, double %2,
+   metadata !"round.dynamic",
+   metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define float @f3(float %0, float %1, float %2) #0 {
+; NOFMA-LABEL: f3:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:pushq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 16
+; NOFMA-NEXT:xorps {{.*}}(%rip), %xmm2
+; NOFMA-NEXT:callq fmaf
+; NOFMA-NEXT:popq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 8
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f3:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
+; FMA-NEXT:retq
+;
+; FMA4-LABEL: f3:
+; FMA4:   # %bb.0: # %entry
+; FMA4-NEXT:vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:retq
+entry:
+  %3 = fneg float %2
+  %result = call float @llvm.experimental.constrained.fma.f32(float %0, float %1, float %3,
+  metadata !"round.dynamic",
+  metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define double @f4(double %0, double %1, double %2) #0 {
+; NOFMA-LABEL: f4:
+; NOFMA:   # %bb.0: # %entry
+; NOFMA-NEXT:pushq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 16
+; NOFMA-NEXT:xorps {{.*}}(%rip), %xmm2
+; NOFMA-NEXT:callq fma
+; NOFMA-NEXT:popq %rax
+; NOFMA-NEXT:.cfi_def_cfa_offset 8
+; NOFMA-NEXT:retq
+;
+; FMA-LABEL: f4:
+; FMA:   # %bb.0: # %entry
+; FMA-NEXT:vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
+; FMA-NEXT:retq
+;
+; FMA4-LABEL: f4:
+; FMA4:   # %bb.0: # %entry
+; FMA4-NEXT:vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:retq
+entry:
+  %3 = fneg double %2
+  %result = call double @llvm.experimental.constrained.fma.f64(d

[PATCH] D92837: [X86] Support tilezero intrinsic and c interface for AMX.

2020-12-08 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: llvm/lib/Target/X86/X86ExpandPseudo.cpp:499
+MI.RemoveOperand(3); // Remove $tmmcfg
+for (int i = 2; i > 0; --i)
+  MI.RemoveOperand(i);

Can change to i = 3?



Comment at: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:4624
+  SDValue Chain = Node->getOperand(0);
+  SDValue Ops[] = {Node->getOperand(2), Node->getOperand(3), CFG, Chain};
+  MachineSDNode *CNode =

Why tilezero needs chain?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92837/new/

https://reviews.llvm.org/D92837

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87981: [X86] AMX programming model.

2020-12-09 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.

LGTM. I think we can land this patch as a beginning. Cheers~


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87981/new/

https://reviews.llvm.org/D87981

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D92940: [X86] Convert fadd/fmul _mm_reduce_* intrinsics to emit llvm.reduction intrinsics (PR47506)

2020-12-11 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

LGTM. Thanks for bringing this refactor.
I also verified that ICC and GCC both do reduce math in an binary tree way, 
though sometimes ICC has a different LSB from GCC and Clang.




Comment at: clang/lib/Headers/avx512fintrin.h:9559
 static __inline__ double __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_pd(__m512d __V) {
   _mm512_mask_reduce_operator(max_pd);

Better to change min and max as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92940/new/

https://reviews.llvm.org/D92940

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D93179: [X86] Convert fmin/fmax _mm_reduce_* intrinsics to emit llvm.reduction intrinsics (PR47506)

2020-12-14 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

Yes, it is using maxpd/minpd here. Sorry for missing the nan cases.
In fact, I doubt about the behavior when using fast math with target 
intrinsics. In my opinion, target intrinsics are always associated with given 
instructions (reduce* are exceptions). So the behavior of intrinsics, e.g. 
respect nans, signaling, rounding, exception etc. are concordance with their 
associated instructions. That said the fast math flags won't change the 
behavior of intrinsics.
Assume above, I'm happy to set these expansions to fast math. Either keeping 
the existing implementaion or expansion LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93179/new/

https://reviews.llvm.org/D93179

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D93078: [utils] Fix UpdateTestChecks case where 2 runs differ for last label

2020-12-14 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

What's the difference with the existing code? It looks to me that you just 
brought the warning out of loop, right?




Comment at: llvm/utils/update_analyze_test_checks.py:128
   raw_tool_output, prefixes, func_dict, func_order, args.verbose, 
False, False)
-
+
+common.warn_on_failed_prefixes(func_dict)

redundant space.



Comment at: llvm/utils/update_analyze_test_checks.py:129
+
+common.warn_on_failed_prefixes(func_dict)
 is_in_function = False

Can we move these warn to common.py?



Comment at: llvm/utils/update_cc_test_checks.py:269
 line2spell_and_mangled_list[k].append(v)
-
+
+common.warn_on_failed_prefixes(func_dict)

redundant space.



Comment at: llvm/utils/update_test_checks.py:120
   ti.args.function_signature, ti.args.check_attributes)
-
+
+common.warn_on_failed_prefixes(func_dict)

redundant space.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93078/new/

https://reviews.llvm.org/D93078

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D93179: [X86] Convert fmin/fmax _mm_reduce_* intrinsics to emit llvm.reduction intrinsics (PR47506)

2020-12-14 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

In D93179#2452932 , @spatel wrote:

> If we're going by existing behavior/compatibility, gcc/icc use packed ops too:
> https://godbolt.org/z/9jEhaW
> ...so there's an implicit 'nnan nsz' in these intrinsics (and that should be 
> documented in the header file (and file a bug for Intel's page at 
> https://software.intel.com/sites/landingpage/IntrinsicsGuide/ ?).

I agreed. I have filed a bug internally for intrinsic guide.
The link for reporting bugs on the intrinsics guide is broken. Please let me 
know if you find any bugs of intrinsic guide. Thanks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93179/new/

https://reviews.llvm.org/D93179

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D93078: [utils] Fix UpdateTestChecks case where 2 runs differ for last label

2020-12-14 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

LGTM. Thanks.
`update_test_prefix.py` assumes the conflicting output. You may need to change 
the expection of it as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93078/new/

https://reviews.llvm.org/D93078

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D93078: [utils] Fix UpdateTestChecks case where 2 runs differ for last label

2020-12-15 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: llvm/utils/update_analyze_test_checks.py:129
+
+common.warn_on_failed_prefixes(func_dict)
 is_in_function = False

mtrofin wrote:
> pengfei wrote:
> > Can we move these warn to common.py?
> Come to think of it, maybe moving it to common.py was not quite ideal:
> - once a warning is issued in the middle of the list of RUN lines, it'll just 
> be re-issued next time around; we could warn and exit, but that'd be annoying 
> if there's another failure later in the RUN list.
> - if we do it at the end, we can additionally distinguish the case where a 
> prefix has an empty dict of funcs associated with it -> and the warning would 
> be "there are unused prefixes - please remove %s'. This is more discoverable 
> than llvm-lit -a, and also than relying on the user knowing to run 
> uplate_test_prefix.
> 
> I think that the benefits (discoverability, better, more concise warnings) 
> make the extra 2-3 lines worth it; and we already have tests for these other 
> tools - wdyt?
Make sence for me. Thanks for continuing to improve it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93078/new/

https://reviews.llvm.org/D93078

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D91927: [X86] Add x86_amx type for intel AMX.

2020-12-21 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: llvm/lib/IR/ConstantFold.cpp:539
+  if (V->isNullValue() && !DestTy->isX86_MMXTy() && !DestTy->isX86_AMXTy()
+  && opc != Instruction::AddrSpaceCast)
 return Constant::getNullValue(DestTy);

Operation should at the end of the line.



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:55
+  switch (II->getIntrinsicID()) {
+  default: {
+Row = II->getArgOperand(0);

I think we'd better to check exceptions. E.g.
```
default:
  llvm_unreachable("");
case Intrinsic::x86_tileloadd64_internal:
case Intrinsic::x86_tdpbssd_internal:
case Intrinsic::x86_tilestored64_internal:
  Row = II->getArgOperand(0);
  Col = II->getArgOperand(1);
  break;
```



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:114
+  Value *Row = nullptr, *Col = nullptr;
+  Use &U = *(Bitcast->use_begin());
+  unsigned OpNo = U.getOperandNo();

Why don't check empty like line 157?



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:193
+// %2 = load <256 x i32>, <256 x i32>* %addr, align 1024
+auto *II = cast(Src);
+Value *Row = II->getOperand(0);

Is it possible the x86_amx operand isn't from AMX intrinsic, e.g.
```
%src = bitcast <256 x i32> %xxx to x86_amx
%2 = bitcast x86_amx %src to <256 x i32>
```



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:233
 bool X86LowerAMXType::visit() {
   bool C;
+  SmallVector DeadInsts;

Better move it to line 310.



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:238
 for (Instruction &Inst : BB) {
-  LoadInst *LD = dyn_cast(&Inst);
-  // Check load instruction.
-  // %3 = load <256 x i32>, <256 x i32>* %1, align 64
-  if (LD) {
-FixedVectorType *VTy = dyn_cast(Inst.getType());
-if (!IsAMXType(VTy))
-  continue;
-LDSet.insert(&Inst);
+  if (!dyn_cast(&Inst))
 continue;

Better to reuse the cast result, e.g.
```
BitCastInst *BInst = dyn_cast(&Inst);
if (!BInst )
```
You can save several `cast(&Inst)` below.



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:265
+// If the dst type is <256 x i32>*, it is valid intruction.
+// %0 = bitcast x86_amx* %tile to <256 x i32>*
+// %1 = load <256 x i32>, <256 x i32>* %0, align 64

Where's `x86_amx* %tile` from? Shouldn't been transfered to `x86_amx` before 
this bitcast if it exists?



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:271
+LoadInst *LD = dyn_cast(Src);
+if (!LD || !LD->hasOneUser()) {
+  transformBitcast(cast(&Inst));

Maybe better to keep a duplicated `load` that calling `transformBitcast`. The 
same for line 285.



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:286
+if (!Inst.hasOneUser()) {
+  transformBitcast(cast(&Inst));
+  DeadInsts.push_back(&Inst);

Why we need to consider <256 x i32> has more than one use?



Comment at: llvm/test/CodeGen/X86/AMX/amx-across-func.ll:89-91
 attributes #2 = { nounwind uwtable 
"correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" 
"frame-pointer"="none" "less-precise-fpmad"="false" 
"min-legal-vector-width"="8192" "no-infs-fp-math"="false" 
"no-jump-tables"="false" "no-nans-fp-math"="false" 
"no-signed-zeros-fp-math"="false" "no-trapping-math"="true" 
"stack-protector-buffer-size"="8" "target-cpu"="x86-64" 
"target-features"="+amx-int8,+amx-tile,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" 
"tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" 
"disable-tail-calls"="false" "frame-pointer"="none" 
"less-precise-fpmad"="false" "no-infs-fp-math"="false" 
"no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" 
"no-trapping-math"="true" "stack-protector-buffer-size"="8" 
"target-cpu"="x86-64" 
"target-features"="+amx-int8,+amx-tile,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" 
"tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #4 = { nounwind }

Better to remove these unused attributes. The same to other tests.



Comment at: llvm/test/CodeGen/X86/AMX/amx-type.ll:67
+
+define dso_local void @test_src_add(<256 x i32> %x, <256 x i32> %y, i16 %r, 
i16 %c, i8* %buf, i64 %s) #2 {
+; CHECK-LABEL: @test_src_add(

For this and the next test, we have chances to optimize to memcpy if we can 
make sure %s is constant 64.



Comment at: llvm/test/CodeGen/X86/AMX/amx-type.ll:103
+
 define dso_local void @test_load(i8* %in, i8* %out) local_unnamed_addr #2 {
 ; CHECK-LABEL: @test_load(

We don't need to check this case now, right?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
 

[PATCH] D91927: [X86] Add x86_amx type for intel AMX.

2020-12-22 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:264
+  SmallVector DeadInsts;
+  SmallVector DeadBitcasts;
+

Maybe better to use BitCastInst?



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:274
+  if (Bitcast->user_empty()) {
+DeadInsts.push_back(Bitcast);
 continue;

Why don't put it in DeadBitcasts?



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:280
+  Type *Ty = Bitcast->getType();
+  auto CanonicalizeBitcast = [&]() {
+bool Change = false;

Can we leave the canonicalize bitcast cases a single patch. It's a bit complex 
here and I don't think it's a common case.



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:420
   }
+  // Delete user first.
+  for (auto *Inst : DeadBitcasts)

This comment is for above code? Better move it up.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91927/new/

https://reviews.llvm.org/D91927

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D91927: [X86] Add x86_amx type for intel AMX.

2020-12-23 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

> In my test case, it is transformed after Combine redundant instructions.

Can we disable it for AMX type? The pointer to AMX type is meaningless and may 
result in bad perfomance.




Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:300
+// If the dst type is <256 x i32>*, it is valid intruction.
+// %0 = bitcast x86_amx* %tile to <256 x i32>*
+// %1 = load <256 x i32>, <256 x i32>* %0, align 64

I don't see any chance this happen. But we still need to handle the x86_amx* 
here if possible, right?
Maybe better to give an assertion for now.
```
cast(Src->getType())->isX86_AMXTy()
```


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91927/new/

https://reviews.llvm.org/D91927

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D91927: [X86] Add x86_amx type for intel AMX.

2020-12-23 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

In D91927#2470818 , @LuoYuanke wrote:

> In D91927#2469977 , @pengfei wrote:
>
>>> In my test case, it is transformed after Combine redundant instructions.
>>
>> Can we disable it for AMX type? The pointer to AMX type is meaningless and 
>> may result in bad perfomance.
>
> Ok, I'll disable the transform for AMX type.

Good job.




Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:123
 
-  LoadMap[Inst] = std::make_pair(Lo, Hi);
+  auto *Tile = Bitcast->getOperand(0);
+  auto *II = cast(Tile);

Value



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:196
+// %2 = load <256 x i32>, <256 x i32>* %addr, align 1024
+auto *II = dyn_cast(Src);
+if (!II)

How about the `Tile` comes from tdpbssd?



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:47
+  Type *V256I32Ty = VectorType::get(Builder.getInt32Ty(), 256, false);
+  auto AllocaAlignment = DL.getPrefTypeAlign(V256I32Ty);
+  unsigned AllocaAS = DL.getAllocaAddrSpace();

Currently, we don't have HW type for v256i32. I think 64 bytes(512bits) should 
be enough here.



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:124
+  auto *II = cast(Tile);
+  // Tile is output from AMX intrinsic. The first operand of the
+  // intrinsic is row, the second operand of the intrinsic is column.

How about the `Tile` comes from tdpbssd?



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:79
+// -->
+// %addr = alloca <256 x i32>, align 1024
+// store <256 x i32> %src, <256 x i32>* %addr, align 1024

LuoYuanke wrote:
> pengfei wrote:
> > Why the alignment not be 64?
> 1024 is conservatives, because vector require the alignment to be the vector 
> size. Here generate vector <256 x i32> load/store.
We don't need to align to 1024. 64 should be enough. The same for below 
comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91927/new/

https://reviews.llvm.org/D91927

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D91927: [X86] Add x86_amx type for intel AMX.

2020-12-23 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:243
+}
+// If load has mutli-user, duplicate a amx load.
+// %src = load <256 x i32>, <256 x i32>* %addr, align 64

vector



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:312
+  SmallSet DeletedInst;
+  auto DeleteInst = [&](Instruction *Inst) {
+SmallVector DeadIs;

Why we need to recursively delete them? I think delete the nodes in DeadInsts 
is enough.



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:124
+  auto *II = cast(Tile);
+  // Tile is output from AMX intrinsic. The first operand of the
+  // intrinsic is row, the second operand of the intrinsic is column.

LuoYuanke wrote:
> pengfei wrote:
> > How about the `Tile` comes from tdpbssd?
> We have a convention, when amx intrinsics define a x86_amx tile the first 2 
> operands is the shape of the defined tile. For tdpbssd, the intrinsics 
> operands are (m, n, k, ...). (m, n) is the shape of the produced tile.
Oh, yes. I missed that. Thanks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91927/new/

https://reviews.llvm.org/D91927

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D91927: [X86] Add x86_amx type for intel AMX.

2020-12-24 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

LGTM. Thanks for the refactors. Maybe better to wait for a few days to see if 
others have objections.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91927/new/

https://reviews.llvm.org/D91927

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87981: [X86] AMX programming model prototype.

2020-11-04 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

You may still need to change the format according to the Lint suggestions.




Comment at: clang/lib/Headers/amxintrin.h:227
+
+/// This is new intrinsic interface
+typedef int _tile_data __attribute__((__vector_size__(1024), __aligned__(64)));

The comment is useless.



Comment at: clang/lib/Headers/amxintrin.h:243
+_tile_stored_internal(unsigned short m, unsigned short n, void *base,
+  int stride, _tile_data tile) {
+  return __builtin_ia32_tilestored64_internal(m, n, base,

The type is inconsistent with `__tile_stored`



Comment at: clang/lib/Headers/amxintrin.h:255
+__DEFAULT_FN_ATTRS_INT8
+void __tile_loadd(__tile *dst, const void *base, long long stride) {
+  dst->tile = _tile_loadd_internal(dst->row, dst->col, base, stride);

Why not use `size_t`?



Comment at: clang/lib/Headers/amxintrin.h:266
+__DEFAULT_FN_ATTRS_INT8
+void __tile_stored(void *base, long long stride, __tile src) {
+  _tile_stored_internal(src.row, src.col, base, stride, src.tile);

The same here.



Comment at: clang/test/CodeGen/X86/amx_api.c:13
+  //CHECK-LABEL: @test_api
+  //CHECK: call <256 x i32> @llvm.x86.tileloadd64.internal
+  //CHECK: call <256 x i32> @llvm.x86.tdpbssd.internal

Shoud it check for 3 and only 3 `llvm.x86.tileloadd64.internal`?



Comment at: llvm/include/llvm/CodeGen/Passes.h:494
+
+  FunctionPass *createX86LowerAMXTypePass();
 } // End llvm namespace

Comments?



Comment at: llvm/include/llvm/CodeGen/TileShapeInfo.h:47
+  return true;
+if ((RowImm != InvalidImmShape) && (Shape.getRowImm() != InvalidImmShape) 
&&
+(ColImm != InvalidImmShape) && (Shape.getColImm() != InvalidImmShape)) 
{

You just need to check
`RowImm != InvalidImmShape && ColImm != InvalidImmShape`



Comment at: llvm/lib/Target/X86/X86TileConfig.cpp:117
+   FrameIdx, Offset)
+  .addImm(Imm);
+}

The format looks strange, I wonder why Lint didn't report it.



Comment at: llvm/test/CodeGen/X86/ipra-reg-usage.ll:6
 define preserve_allcc void @foo()#0 {
-; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw 
$fpsw $fs $gs $hip $ip $mxcsr $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 
$cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 
$cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 
$dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 
$k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 
$st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 $xmm17 $xmm18 
$xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 
$xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 
$ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 
$ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 
$zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 
$zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 
$zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh 
$r11d $r11w $r11wh
+; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw 
$fpsw $fs $gs $hip $ip $mxcsr $rip $riz $ss $ssp $tmmcfg $bnd0 $bnd1 $bnd2 
$bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 
$cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 
$dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 
$k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 
$st4 $st5 $st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 
$xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 
$xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 
$ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 
$ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 
$ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 
$zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 
$zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 
$r11b $r11bh $r11d $r11w $r11wh $k0_k1 $k2_k3 $k4_k5 $k6_k7
   call void @bar1()

LuoYuanke wrote:
> LuoYuanke wrote:
> > pengfei wrote:
> > > Why this patch affects the k registers?
> > This looks wired to me too.  The patch only add "tmmcfg". I'll look into it 
> > later.
> I check the test case without my patch the k pair registers are clobbered. 
> But FileCheck only match the strings, so the test passes. I can also remove 
> "$k0_k1 $k2_k3 $k4_k5 $k6_k7" f

[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-04 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

Craig's method sounds good.
@FreddyYe , Why we check AVX512BW instead of AVX512F. I saw SDM says it depends 
on AVX512F.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-04 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

In D90822#2375423 , @pengfei wrote:

> Craig's method sounds good.

I think it may result in potential problem once we change the order of them in 
immintrin.h, it should be better to use the first method.

  #if !(defined(_MSC_VER) || defined(SCE)) || __has_feature(modules) || \
  (defined(__AVX512VL__)  && define(__AVX512BW__))


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-04 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

In D90822#2375445 , @craig.topper 
wrote:

> In D90822#2375425 , @pengfei wrote:
>
>> In D90822#2375423 , @pengfei wrote:
>>
>>> Craig's method sounds good.
>>
>> I think it may result in potential problem once we change the order of them 
>> in immintrin.h, it should be better to use the first method.
>>
>>   #if !(defined(_MSC_VER) || defined(SCE)) || __has_feature(modules) || \
>>   (defined(__AVX512VL__)  && define(__AVX512BW__))
>
> But if the order changes then the code won't compile because _m512i or some 
> other type won't be defined.

I was thinking we may lose the chance to know we change them in wrong order. 
But it should be OK since we have intrinsic tests. So I agree with you.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-04 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

In D90822#2375469 , @craig.topper 
wrote:

> In D90822#2375463 , @FreddyYe wrote:
>
>> In D90822#2375423 , @pengfei wrote:
>>
>>> Craig's method sounds good.
>>> @FreddyYe , Why we check AVX512BW instead of AVX512F. I saw SDM says it 
>>> depends on AVX512F.
>>
>> I was referring to the old implement and test case. Seems like a error 
>> introduced before.
>
> mmask64 requires avx512bw. And 32xi16 and 64xi8 aren’t well supported without 
> avx512bw. We used to split them always, but we only split specific 
> instructions now.
>
> We also need avx512bw for the selectb and selectw builtins used for masking.

I see. Thank you!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-05 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/lib/Headers/gfniintrin.h:24
+#ifdef __AVXINTRIN_H
+#ifdef __AVX512BWINTRIN_H
+#ifdef __AVX512VLINTRIN_H

`__AVX512VLBWINTRIN_H` is better.



Comment at: clang/lib/Headers/gfniintrin.h:52
 U, A, B, I)
-
+#endif
 

missing comments



Comment at: clang/lib/Headers/gfniintrin.h:190
 }
+#endif
 

comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-05 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/lib/Headers/gfniintrin.h:104-111
 #define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U), 
   \
 (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I),
   \
 (__v64qi)(__m512i)(S))
 
 #define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
   
(__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), 
   \

These 2 functions need to move to __AVX512BWINTRIN_H



Comment at: clang/lib/Headers/gfniintrin.h:131-138
 #define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U), 
   \
 (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I),   
   \
 (__v64qi)(__m512i)(S))
 
 #define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
   (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), 
  \

These 2 functions need to move to __AVX512BWINTRIN_H



Comment at: clang/lib/Headers/gfniintrin.h:170-183
 static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
 _mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_selectb_512(__U,
   (__v64qi) _mm512_gf2p8mul_epi8(__A, __B),
   (__v64qi) __S);
 }

These 2 functions need to move to __AVX512BWINTRIN_H


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90822: [X86] use macros to split GFNI intrinsics into different kinds

2020-11-05 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.

LGTM. Thanks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90822/new/

https://reviews.llvm.org/D90822

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-17 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/lib/AST/Stmt.cpp:795
+  SmallVector Pieces;
+  AsmStr.split(Pieces, "\n\t");
+  std::string MSAsmString;

Can we always assume the separator is `\n\t`?



Comment at: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp:3083
+  }
+  if (IsPrefix) {
+NameLoc = Parser.getTok().getLoc();

You just need to check `ForcedVEXEncoding != VEXEncoding_Default`.



Comment at: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp:3084
+  if (IsPrefix) {
+NameLoc = Parser.getTok().getLoc();
+if (getLexer().isNot(AsmToken::Identifier))

Unused assignment. It may suppose to be used on line 3086.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-17 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

> 2. Delete IsPrefix parameter, and delete 'break', so that we won't check 
> prefix again. I am not sure if this is right. Att format can allow two prefix 
> and using the last one as the finally encoding prefix. I think this may not 
> be the original intention of the design.

It allows more than two, right? like `{vex}{vex2}{vex3} instruction`. I think 
it should be a bug for att.




Comment at: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp:3079
+  if (ForcedVEXEncoding != VEXEncoding_Default) {
+if (getLexer().isNot(AsmToken::Identifier))
+  return Error(Parser.getTok().getLoc(), "Expected identifier");

Do you need to eat the prefix here?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-11-17 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

LGTM. Thanks.
You'd better wait one or two days to see if other people objects.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D80344: [Windows SEH]: HARDWARE EXCEPTION HANDLING (MSVC -EHa) - Part 1

2020-11-19 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

Do we need to consider FP exceptions in _try block?




Comment at: clang/include/clang/Driver/Options.td:886
   HelpText<"Enable C++ exceptions">, Flags<[CC1Option]>;
+def feh_asynch: Flag<["-"], "feh-asynch">, Group,
+  HelpText<"Enable EH Asynchronous exceptions">, Flags<[CC1Option]>;

It's better to follow alphabetical for it and line 1531.



Comment at: clang/lib/CodeGen/CGCleanup.cpp:1287
+  PopCleanupBlock();
+else
+{

Move `{` to the same line with `else` and better to add curly brackets for 
`PopCleanupBlock();`



Comment at: clang/test/CodeGen/windows-seh-EHa-TryInFinally.cpp:1
+// RUN: %clang_cc1 -triple x86_64-windows -feh-asynch -fcxx-exceptions 
-fexceptions -fms-extensions -x c++ -Wno-implicit-function-declaration -S 
-emit-llvm %s -o - | FileCheck %s
+

Should this be a C file? I saw LangRef says they are used for C function.



Comment at: llvm/docs/LangRef.rst:11534
+'``llvm.seh.try.begin``' and '``llvm.seh.try.end``' Intrinsics
+
+

Keep the same length with above line.



Comment at: llvm/docs/LangRef.rst:11560
+'``llvm.seh.scope.begin``' and '``llvm.seh.scope.end``' Intrinsics
+
+

Keep the same length with above line.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80344/new/

https://reviews.llvm.org/D80344

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89102: [X86] Add HRESET instruction.

2020-10-09 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei created this revision.
pengfei added reviewers: craig.topper, RKSimon, LuoYuanke.
Herald added subscribers: llvm-commits, cfe-commits, dang, hiraditya, mgorny.
Herald added projects: clang, LLVM.
pengfei requested review of this revision.

For more details about these instructions, please refer to the latest ISE 
document: 
https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D89102

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/hresetintrin.h
  clang/lib/Headers/immintrin.h
  clang/lib/Headers/x86gprintrin.h
  clang/test/CodeGen/x86-hreset-intrin.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrFormats.td
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86Subtarget.h
  llvm/test/MC/Disassembler/X86/x86-32.txt
  llvm/test/MC/Disassembler/X86/x86-64.txt
  llvm/test/MC/X86/x86-32-coverage.s
  llvm/test/MC/X86/x86-64.s

Index: llvm/test/MC/X86/x86-64.s
===
--- llvm/test/MC/X86/x86-64.s
+++ llvm/test/MC/X86/x86-64.s
@@ -2014,3 +2014,7 @@
 // CHECK: tdcall
 // CHECK: encoding: [0x66,0x0f,0x01,0xcc]
 tdcall
+
+// CHECK: hreset
+// CHECK: encoding: [0xf3,0x0f,0x3a,0xf0,0xc0,0x01]
+hreset $1
Index: llvm/test/MC/X86/x86-32-coverage.s
===
--- llvm/test/MC/X86/x86-32-coverage.s
+++ llvm/test/MC/X86/x86-32-coverage.s
@@ -10891,4 +10891,8 @@
 
 // CHECK: tdcall
 // CHECK: encoding: [0x66,0x0f,0x01,0xcc]
-tdcall
\ No newline at end of file
+tdcall
+
+// CHECK: hreset
+// CHECK: encoding: [0xf3,0x0f,0x3a,0xf0,0xc0,0x01]
+hreset $1
Index: llvm/test/MC/Disassembler/X86/x86-64.txt
===
--- llvm/test/MC/Disassembler/X86/x86-64.txt
+++ llvm/test/MC/Disassembler/X86/x86-64.txt
@@ -712,3 +712,6 @@
 
 #CHECK: tdcall
 0x66 0x0f 0x01 0xcc
+
+# CHECK: hreset $1
+0xf3 0x0f 0x3a 0xf0 0xc0 0x01
Index: llvm/test/MC/Disassembler/X86/x86-32.txt
===
--- llvm/test/MC/Disassembler/X86/x86-32.txt
+++ llvm/test/MC/Disassembler/X86/x86-32.txt
@@ -1000,3 +1000,6 @@
 
 #CHECK: tdcall
 0x66 0x0f 0x01 0xcc
+
+# CHECK: hreset $1
+0xf3 0x0f 0x3a 0xf0 0xc0 0x01
Index: llvm/lib/Target/X86/X86Subtarget.h
===
--- llvm/lib/Target/X86/X86Subtarget.h
+++ llvm/lib/Target/X86/X86Subtarget.h
@@ -401,6 +401,9 @@
   /// Processor support key locker wide instructions
   bool HasWIDEKL = false;
 
+  /// Processor supports HRESET instruction
+  bool HasHRESET = false;
+
   /// Processor supports SERIALIZE instruction
   bool HasSERIALIZE = false;
 
@@ -736,6 +739,7 @@
   bool hasENQCMD() const { return HasENQCMD; }
   bool hasKL() const { return HasKL; }
   bool hasWIDEKL() const { return HasWIDEKL; }
+  bool hasHRESET() const { return HasHRESET; }
   bool hasSERIALIZE() const { return HasSERIALIZE; }
   bool hasTSXLDTRK() const { return HasTSXLDTRK; }
   bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
Index: llvm/lib/Target/X86/X86InstrInfo.td
===
--- llvm/lib/Target/X86/X86InstrInfo.td
+++ llvm/lib/Target/X86/X86InstrInfo.td
@@ -971,6 +971,7 @@
 def HasENQCMD: Predicate<"Subtarget->hasENQCMD()">;
 def HasKL: Predicate<"Subtarget->hasKL()">;
 def HasWIDEKL: Predicate<"Subtarget->hasWIDEKL()">;
+def HasHRESET: Predicate<"Subtarget->hasHRESET()">;
 def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">;
 def HasTSXLDTRK  : Predicate<"Subtarget->hasTSXLDTRK()">;
 def HasAMXTILE   : Predicate<"Subtarget->hasAMXTILE()">;
@@ -2911,6 +2912,13 @@
 def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>;
 def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>;
 
+//===--===//
+// HRESET Instruction
+//
+let Uses = [EAX] in
+  def HRESET : Ii8<0xF0, MRM_C0, (outs), (ins i32u8imm:$imm), "hreset\t$imm", []>,
+   Requires<[HasHRESET]>, TAXS;
+
 //===--===//
 // SERIALIZE Instruction
 //
Index: llvm/lib/Target/X86/X86InstrFormats.td
===
--- llvm/lib/Target/X86/X86InstrFormats.td
+++ llvm/lib/Target/X86/X86InstrFormats.td
@@ -216,6 +216,7 @@
 class TAPS :

[PATCH] D89102: [X86] Add HRESET instruction.

2020-10-09 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei marked an inline comment as done.
pengfei added a comment.

Thanks for the review.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89102/new/

https://reviews.llvm.org/D89102

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89102: [X86] Add HRESET instruction.

2020-10-09 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei updated this revision to Diff 297234.
pengfei added a comment.

Address review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89102/new/

https://reviews.llvm.org/D89102

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/hresetintrin.h
  clang/lib/Headers/immintrin.h
  clang/lib/Headers/x86gprintrin.h
  clang/test/CodeGen/x86-hreset-intrin.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrFormats.td
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86Subtarget.h
  llvm/test/MC/Disassembler/X86/x86-32.txt
  llvm/test/MC/Disassembler/X86/x86-64.txt
  llvm/test/MC/X86/x86-32-coverage.s
  llvm/test/MC/X86/x86-64.s

Index: llvm/test/MC/X86/x86-64.s
===
--- llvm/test/MC/X86/x86-64.s
+++ llvm/test/MC/X86/x86-64.s
@@ -2014,3 +2014,7 @@
 // CHECK: tdcall
 // CHECK: encoding: [0x66,0x0f,0x01,0xcc]
 tdcall
+
+// CHECK: hreset
+// CHECK: encoding: [0xf3,0x0f,0x3a,0xf0,0xc0,0x01]
+hreset $1
Index: llvm/test/MC/X86/x86-32-coverage.s
===
--- llvm/test/MC/X86/x86-32-coverage.s
+++ llvm/test/MC/X86/x86-32-coverage.s
@@ -10891,4 +10891,8 @@
 
 // CHECK: tdcall
 // CHECK: encoding: [0x66,0x0f,0x01,0xcc]
-tdcall
\ No newline at end of file
+tdcall
+
+// CHECK: hreset
+// CHECK: encoding: [0xf3,0x0f,0x3a,0xf0,0xc0,0x01]
+hreset $1
Index: llvm/test/MC/Disassembler/X86/x86-64.txt
===
--- llvm/test/MC/Disassembler/X86/x86-64.txt
+++ llvm/test/MC/Disassembler/X86/x86-64.txt
@@ -712,3 +712,6 @@
 
 #CHECK: tdcall
 0x66 0x0f 0x01 0xcc
+
+# CHECK: hreset $1
+0xf3 0x0f 0x3a 0xf0 0xc0 0x01
Index: llvm/test/MC/Disassembler/X86/x86-32.txt
===
--- llvm/test/MC/Disassembler/X86/x86-32.txt
+++ llvm/test/MC/Disassembler/X86/x86-32.txt
@@ -1000,3 +1000,6 @@
 
 #CHECK: tdcall
 0x66 0x0f 0x01 0xcc
+
+# CHECK: hreset $1
+0xf3 0x0f 0x3a 0xf0 0xc0 0x01
Index: llvm/lib/Target/X86/X86Subtarget.h
===
--- llvm/lib/Target/X86/X86Subtarget.h
+++ llvm/lib/Target/X86/X86Subtarget.h
@@ -401,6 +401,9 @@
   /// Processor support key locker wide instructions
   bool HasWIDEKL = false;
 
+  /// Processor supports HRESET instruction
+  bool HasHRESET = false;
+
   /// Processor supports SERIALIZE instruction
   bool HasSERIALIZE = false;
 
@@ -736,6 +739,7 @@
   bool hasENQCMD() const { return HasENQCMD; }
   bool hasKL() const { return HasKL; }
   bool hasWIDEKL() const { return HasWIDEKL; }
+  bool hasHRESET() const { return HasHRESET; }
   bool hasSERIALIZE() const { return HasSERIALIZE; }
   bool hasTSXLDTRK() const { return HasTSXLDTRK; }
   bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
Index: llvm/lib/Target/X86/X86InstrInfo.td
===
--- llvm/lib/Target/X86/X86InstrInfo.td
+++ llvm/lib/Target/X86/X86InstrInfo.td
@@ -971,6 +971,7 @@
 def HasENQCMD: Predicate<"Subtarget->hasENQCMD()">;
 def HasKL: Predicate<"Subtarget->hasKL()">;
 def HasWIDEKL: Predicate<"Subtarget->hasWIDEKL()">;
+def HasHRESET: Predicate<"Subtarget->hasHRESET()">;
 def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">;
 def HasTSXLDTRK  : Predicate<"Subtarget->hasTSXLDTRK()">;
 def HasAMXTILE   : Predicate<"Subtarget->hasAMXTILE()">;
@@ -2911,6 +2912,13 @@
 def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>;
 def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>;
 
+//===--===//
+// HRESET Instruction
+//
+let Uses = [EAX], SchedRW = [WriteSystem] in
+  def HRESET : Ii8<0xF0, MRM_C0, (outs), (ins i32u8imm:$imm), "hreset\t$imm", []>,
+   Requires<[HasHRESET]>, TAXS;
+
 //===--===//
 // SERIALIZE Instruction
 //
Index: llvm/lib/Target/X86/X86InstrFormats.td
===
--- llvm/lib/Target/X86/X86InstrFormats.td
+++ llvm/lib/Target/X86/X86InstrFormats.td
@@ -216,6 +216,7 @@
 class TAPS : TA { Prefix OpPrefix = PS; }
 class TAPD : TA { Prefix OpPrefix = PD; }
 class TAXD : TA { Prefix OpPrefix = XD; }
+class TAXS : TA { Prefix OpPrefix = XS; }
 class VEX{ Encoding OpEnc = EncVEX; }
 class VEX_W{ bit HasV

[PATCH] D89102: [X86] Add HRESET instruction.

2020-10-10 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei updated this revision to Diff 297402.
pengfei added a comment.

Rebased.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89102/new/

https://reviews.llvm.org/D89102

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/hresetintrin.h
  clang/lib/Headers/immintrin.h
  clang/lib/Headers/x86gprintrin.h
  clang/test/CodeGen/x86-hreset-intrin.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrFormats.td
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86Subtarget.h
  llvm/test/MC/Disassembler/X86/x86-32.txt
  llvm/test/MC/Disassembler/X86/x86-64.txt
  llvm/test/MC/X86/x86-32-coverage.s
  llvm/test/MC/X86/x86-64.s

Index: llvm/test/MC/X86/x86-64.s
===
--- llvm/test/MC/X86/x86-64.s
+++ llvm/test/MC/X86/x86-64.s
@@ -2014,3 +2014,7 @@
 // CHECK: tdcall
 // CHECK: encoding: [0x66,0x0f,0x01,0xcc]
 tdcall
+
+// CHECK: hreset
+// CHECK: encoding: [0xf3,0x0f,0x3a,0xf0,0xc0,0x01]
+hreset $1
Index: llvm/test/MC/X86/x86-32-coverage.s
===
--- llvm/test/MC/X86/x86-32-coverage.s
+++ llvm/test/MC/X86/x86-32-coverage.s
@@ -10891,4 +10891,8 @@
 
 // CHECK: tdcall
 // CHECK: encoding: [0x66,0x0f,0x01,0xcc]
-tdcall
\ No newline at end of file
+tdcall
+
+// CHECK: hreset
+// CHECK: encoding: [0xf3,0x0f,0x3a,0xf0,0xc0,0x01]
+hreset $1
Index: llvm/test/MC/Disassembler/X86/x86-64.txt
===
--- llvm/test/MC/Disassembler/X86/x86-64.txt
+++ llvm/test/MC/Disassembler/X86/x86-64.txt
@@ -712,3 +712,6 @@
 
 #CHECK: tdcall
 0x66 0x0f 0x01 0xcc
+
+# CHECK: hreset $1
+0xf3 0x0f 0x3a 0xf0 0xc0 0x01
Index: llvm/test/MC/Disassembler/X86/x86-32.txt
===
--- llvm/test/MC/Disassembler/X86/x86-32.txt
+++ llvm/test/MC/Disassembler/X86/x86-32.txt
@@ -1000,3 +1000,6 @@
 
 #CHECK: tdcall
 0x66 0x0f 0x01 0xcc
+
+# CHECK: hreset $1
+0xf3 0x0f 0x3a 0xf0 0xc0 0x01
Index: llvm/lib/Target/X86/X86Subtarget.h
===
--- llvm/lib/Target/X86/X86Subtarget.h
+++ llvm/lib/Target/X86/X86Subtarget.h
@@ -401,6 +401,9 @@
   /// Processor support key locker wide instructions
   bool HasWIDEKL = false;
 
+  /// Processor supports HRESET instruction
+  bool HasHRESET = false;
+
   /// Processor supports SERIALIZE instruction
   bool HasSERIALIZE = false;
 
@@ -736,6 +739,7 @@
   bool hasENQCMD() const { return HasENQCMD; }
   bool hasKL() const { return HasKL; }
   bool hasWIDEKL() const { return HasWIDEKL; }
+  bool hasHRESET() const { return HasHRESET; }
   bool hasSERIALIZE() const { return HasSERIALIZE; }
   bool hasTSXLDTRK() const { return HasTSXLDTRK; }
   bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
Index: llvm/lib/Target/X86/X86InstrInfo.td
===
--- llvm/lib/Target/X86/X86InstrInfo.td
+++ llvm/lib/Target/X86/X86InstrInfo.td
@@ -971,6 +971,7 @@
 def HasENQCMD: Predicate<"Subtarget->hasENQCMD()">;
 def HasKL: Predicate<"Subtarget->hasKL()">;
 def HasWIDEKL: Predicate<"Subtarget->hasWIDEKL()">;
+def HasHRESET: Predicate<"Subtarget->hasHRESET()">;
 def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">;
 def HasTSXLDTRK  : Predicate<"Subtarget->hasTSXLDTRK()">;
 def HasAMXTILE   : Predicate<"Subtarget->hasAMXTILE()">;
@@ -2911,6 +2912,13 @@
 def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>;
 def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>;
 
+//===--===//
+// HRESET Instruction
+//
+let Uses = [EAX], SchedRW = [WriteSystem] in
+  def HRESET : Ii8<0xF0, MRM_C0, (outs), (ins i32u8imm:$imm), "hreset\t$imm", []>,
+   Requires<[HasHRESET]>, TAXS;
+
 //===--===//
 // SERIALIZE Instruction
 //
Index: llvm/lib/Target/X86/X86InstrFormats.td
===
--- llvm/lib/Target/X86/X86InstrFormats.td
+++ llvm/lib/Target/X86/X86InstrFormats.td
@@ -216,6 +216,7 @@
 class TAPS : TA { Prefix OpPrefix = PS; }
 class TAPD : TA { Prefix OpPrefix = PD; }
 class TAXD : TA { Prefix OpPrefix = XD; }
+class TAXS : TA { Prefix OpPrefix = XS; }
 class VEX{ Encoding OpEnc = EncVEX; }
 class VEX_W{ bit HasVEX_W = 1; }
 cla

[PATCH] D89102: [X86] Add HRESET instruction.

2020-10-12 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG412cdcf2edf2: [X86] Add HRESET instruction. (authored by 
pengfei).

Changed prior to commit:
  https://reviews.llvm.org/D89102?vs=297402&id=297732#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89102/new/

https://reviews.llvm.org/D89102

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/hresetintrin.h
  clang/lib/Headers/immintrin.h
  clang/lib/Headers/x86gprintrin.h
  clang/test/CodeGen/x86-hreset-intrin.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrFormats.td
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86Subtarget.h
  llvm/test/MC/Disassembler/X86/x86-32.txt
  llvm/test/MC/Disassembler/X86/x86-64.txt
  llvm/test/MC/X86/x86-32-coverage.s
  llvm/test/MC/X86/x86-64.s

Index: llvm/test/MC/X86/x86-64.s
===
--- llvm/test/MC/X86/x86-64.s
+++ llvm/test/MC/X86/x86-64.s
@@ -2014,3 +2014,7 @@
 // CHECK: tdcall
 // CHECK: encoding: [0x66,0x0f,0x01,0xcc]
 tdcall
+
+// CHECK: hreset
+// CHECK: encoding: [0xf3,0x0f,0x3a,0xf0,0xc0,0x01]
+hreset $1
Index: llvm/test/MC/X86/x86-32-coverage.s
===
--- llvm/test/MC/X86/x86-32-coverage.s
+++ llvm/test/MC/X86/x86-32-coverage.s
@@ -10891,4 +10891,8 @@
 
 // CHECK: tdcall
 // CHECK: encoding: [0x66,0x0f,0x01,0xcc]
-tdcall
\ No newline at end of file
+tdcall
+
+// CHECK: hreset
+// CHECK: encoding: [0xf3,0x0f,0x3a,0xf0,0xc0,0x01]
+hreset $1
Index: llvm/test/MC/Disassembler/X86/x86-64.txt
===
--- llvm/test/MC/Disassembler/X86/x86-64.txt
+++ llvm/test/MC/Disassembler/X86/x86-64.txt
@@ -712,3 +712,6 @@
 
 #CHECK: tdcall
 0x66 0x0f 0x01 0xcc
+
+# CHECK: hreset $1
+0xf3 0x0f 0x3a 0xf0 0xc0 0x01
Index: llvm/test/MC/Disassembler/X86/x86-32.txt
===
--- llvm/test/MC/Disassembler/X86/x86-32.txt
+++ llvm/test/MC/Disassembler/X86/x86-32.txt
@@ -1000,3 +1000,6 @@
 
 #CHECK: tdcall
 0x66 0x0f 0x01 0xcc
+
+# CHECK: hreset $1
+0xf3 0x0f 0x3a 0xf0 0xc0 0x01
Index: llvm/lib/Target/X86/X86Subtarget.h
===
--- llvm/lib/Target/X86/X86Subtarget.h
+++ llvm/lib/Target/X86/X86Subtarget.h
@@ -401,6 +401,9 @@
   /// Processor support key locker wide instructions
   bool HasWIDEKL = false;
 
+  /// Processor supports HRESET instruction
+  bool HasHRESET = false;
+
   /// Processor supports SERIALIZE instruction
   bool HasSERIALIZE = false;
 
@@ -736,6 +739,7 @@
   bool hasENQCMD() const { return HasENQCMD; }
   bool hasKL() const { return HasKL; }
   bool hasWIDEKL() const { return HasWIDEKL; }
+  bool hasHRESET() const { return HasHRESET; }
   bool hasSERIALIZE() const { return HasSERIALIZE; }
   bool hasTSXLDTRK() const { return HasTSXLDTRK; }
   bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
Index: llvm/lib/Target/X86/X86InstrInfo.td
===
--- llvm/lib/Target/X86/X86InstrInfo.td
+++ llvm/lib/Target/X86/X86InstrInfo.td
@@ -972,6 +972,7 @@
 def HasENQCMD: Predicate<"Subtarget->hasENQCMD()">;
 def HasKL: Predicate<"Subtarget->hasKL()">;
 def HasWIDEKL: Predicate<"Subtarget->hasWIDEKL()">;
+def HasHRESET: Predicate<"Subtarget->hasHRESET()">;
 def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">;
 def HasTSXLDTRK  : Predicate<"Subtarget->hasTSXLDTRK()">;
 def HasAMXTILE   : Predicate<"Subtarget->hasAMXTILE()">;
@@ -2913,6 +2914,13 @@
 def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>;
 def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>;
 
+//===--===//
+// HRESET Instruction
+//
+let Uses = [EAX], SchedRW = [WriteSystem] in
+  def HRESET : Ii8<0xF0, MRM_C0, (outs), (ins i32u8imm:$imm), "hreset\t$imm", []>,
+   Requires<[HasHRESET]>, TAXS;
+
 //===--===//
 // SERIALIZE Instruction
 //
Index: llvm/lib/Target/X86/X86InstrFormats.td
===
--- llvm/lib/Target/X86/X86InstrFormats.td
+++ llvm/lib/Target/X86/X86InstrFormats.td
@@ -216,6 +216,7 @@
 class TAPS : TA { Prefix OpPrefix = P

[PATCH] D89184: Support complex target features combinations

2020-10-14 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

> D89105   appears to use only `"avx512vl , 
> avx512vnni | avxvnni"`.
> Does it mean `(avx512vl , avx512vnni) | avxvnni` or `avx512vl , (avx512vnni | 
> avxvnni)` ?

We need to express combination to `(avx512vl , avx512vnni) | avxvnni`, the 
previous code will turn it into `avx512vl , (avx512vnni | avxvnni)`.
With this patch, `"avx512vl , avx512vnni | avxvnni"` will turn into `(avx512vl 
, avx512vnni) | avxvnni` by always prioritizing `","`.
I agreed with @echristo that we do need to add some comments for that.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89184/new/

https://reviews.llvm.org/D89184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D80344: [Windows SEH]: HARDWARE EXCEPTION HANDLING (MSVC -EHa) - Part 1

2020-11-22 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

In D80344#2407305 , @tentzen wrote:

> In D80344#2407250 , @pengfei wrote:
>
>> Do we need to consider FP exceptions in _try block?
>
> Yes, FP exception is handled as long as FP exceptions are not disabled (Ex 
> via _controlfp() runtime) and FP exception code is filtered & handled via 
> ___except() statement (Ex, 
> ___except(GetExceptionCode()==EXCEPTION_FLT_INEXACT_RESULT)).

I see. If this is the case, you may need to assign FPE_Strict to _try block to 
preserve FP instructions' order.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80344/new/

https://reviews.llvm.org/D80344

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D91927: [X86] Add x86_amx type for intel AMX.

2020-11-24 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: llvm/lib/IR/DataLayout.cpp:819
+  case Type::X86_AMXTyID:
+return Align(64);
   default:

Should be 512 bits?



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:79
+// -->
+// %addr = alloca <256 x i32>, align 1024
+// store <256 x i32> %src, <256 x i32>* %addr, align 1024

Why the alignment not be 64?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91927/new/

https://reviews.llvm.org/D91927

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D91927: [X86] Add x86_amx type for intel AMX.

2020-11-24 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:164
+}
+// %src = call x86_amx @llvm.x86.tileloadd64.internal(%row, %col, 
%addr,
+// %stride);

`%src` is not used here.



Comment at: llvm/utils/TableGen/IntrinsicEmitter.cpp:252
+  IIT_V256 = 50,
+  IIT_AMX  = 51,
 };

Remove `,`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91927/new/

https://reviews.llvm.org/D91927

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89664: [X86] Add missing code in D89102 [X86] Add HRESET instruction.

2020-10-18 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei created this revision.
pengfei added reviewers: LuoYuanke, FreddyYe, RKSimon, craig.topper.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya.
Herald added projects: clang, LLVM.
pengfei requested review of this revision.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D89664

Files:
  clang/docs/ReleaseNotes.rst
  clang/test/Preprocessor/predefined-arch-macros.c
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td


Index: llvm/lib/Target/X86/X86.td
===
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -767,6 +767,7 @@
   FeatureCLDEMOTE,
   FeatureWAITPKG,
   FeaturePTWRITE,
+  FeatureHRESET,
   FeatureTSXLDTRK,
   FeatureENQCMD,
   FeatureSHSTK,
Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -204,7 +204,8 @@
 FeaturesICLServer | FeatureAMX_TILE | FeatureAMX_INT8 | FeatureAMX_BF16 |
 FeatureAVX512BF16 | FeatureAVX512VP2INTERSECT | FeatureCLDEMOTE |
 FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE |
-FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureWAITPKG;
+FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureWAITPKG |
+FeatureHRESET;
 
 // Intel Atom processors.
 // Bonnell has feature parity with Core2 and adds MOVBE.
Index: clang/test/Preprocessor/predefined-arch-macros.c
===
--- clang/test/Preprocessor/predefined-arch-macros.c
+++ clang/test/Preprocessor/predefined-arch-macros.c
@@ -1664,6 +1664,7 @@
 // CHECK_SPR_M32: #define __F16C__ 1
 // CHECK_SPR_M32: #define __FMA__ 1
 // CHECK_SPR_M32: #define __GFNI__ 1
+// CHECK_SPR_M32: #define __HRESET__ 1
 // CHECK_SPR_M32: #define __INVPCID__ 1
 // CHECK_SPR_M32: #define __LZCNT__ 1
 // CHECK_SPR_M32: #define __MMX__ 1
@@ -1733,6 +1734,7 @@
 // CHECK_SPR_M64: #define __F16C__ 1
 // CHECK_SPR_M64: #define __FMA__ 1
 // CHECK_SPR_M64: #define __GFNI__ 1
+// CHECK_SPR_M64: #define __HRESET__ 1
 // CHECK_SPR_M64: #define __INVPCID__ 1
 // CHECK_SPR_M64: #define __LZCNT__ 1
 // CHECK_SPR_M64: #define __MMX__ 1
Index: clang/docs/ReleaseNotes.rst
===
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -200,6 +200,8 @@
   implies -mtune=. -mtune=generic is the default with no -march or -mtune
   specified.
 
+- Support for ISA HRESET has been added.
+
 Internal API Changes
 
 


Index: llvm/lib/Target/X86/X86.td
===
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -767,6 +767,7 @@
   FeatureCLDEMOTE,
   FeatureWAITPKG,
   FeaturePTWRITE,
+  FeatureHRESET,
   FeatureTSXLDTRK,
   FeatureENQCMD,
   FeatureSHSTK,
Index: llvm/lib/Support/X86TargetParser.cpp
===
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -204,7 +204,8 @@
 FeaturesICLServer | FeatureAMX_TILE | FeatureAMX_INT8 | FeatureAMX_BF16 |
 FeatureAVX512BF16 | FeatureAVX512VP2INTERSECT | FeatureCLDEMOTE |
 FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE |
-FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureWAITPKG;
+FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureWAITPKG |
+FeatureHRESET;
 
 // Intel Atom processors.
 // Bonnell has feature parity with Core2 and adds MOVBE.
Index: clang/test/Preprocessor/predefined-arch-macros.c
===
--- clang/test/Preprocessor/predefined-arch-macros.c
+++ clang/test/Preprocessor/predefined-arch-macros.c
@@ -1664,6 +1664,7 @@
 // CHECK_SPR_M32: #define __F16C__ 1
 // CHECK_SPR_M32: #define __FMA__ 1
 // CHECK_SPR_M32: #define __GFNI__ 1
+// CHECK_SPR_M32: #define __HRESET__ 1
 // CHECK_SPR_M32: #define __INVPCID__ 1
 // CHECK_SPR_M32: #define __LZCNT__ 1
 // CHECK_SPR_M32: #define __MMX__ 1
@@ -1733,6 +1734,7 @@
 // CHECK_SPR_M64: #define __F16C__ 1
 // CHECK_SPR_M64: #define __FMA__ 1
 // CHECK_SPR_M64: #define __GFNI__ 

[PATCH] D89301: [X86] Add user-level interrupt instructions

2020-10-18 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

You need to add test for macro UINTR  in 
clang/test/Preprocessor/predefined-arch-macros.c for SapphireRapids.




Comment at: llvm/docs/ReleaseNotes.rst:117
   the target CPU.
-* Support for ISA HRESET has been added.
+* Support for HRESET and UINTR instructions has been added.
 

Maybe need add to Clang release notes too.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89301/new/

https://reviews.llvm.org/D89301

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89664: [X86] Add missing code in D89102 [X86] Add HRESET instruction.

2020-10-20 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei abandoned this revision.
pengfei added a comment.

HRESET is not supported on Sapphire Rapids. Thanks for pointing it out.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89664/new/

https://reviews.llvm.org/D89664

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89184: Support complex target features combinations

2020-10-21 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
Herald added a subscriber: dexonsmith.

LGTM. But I suggest you waiting for 1 or 2 days to see if other reviewers 
object.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89184/new/

https://reviews.llvm.org/D89184

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D89301: [X86] Add User Interrupts(UINTR) instructions

2020-10-22 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGbe39a6fe6fc6: [X86] Add User Interrupts(UINTR) instructions 
(authored by tianqing, committed by pengfei).

Changed prior to commit:
  https://reviews.llvm.org/D89301?vs=299317&id=299902#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D89301/new/

https://reviews.llvm.org/D89301

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/BuiltinsX86_64.def
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/uintrintrin.h
  clang/lib/Headers/x86gprintrin.h
  clang/test/CodeGen/X86/x86-uintr-builtins.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/predefined-arch-macros.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/IR/IntrinsicsX86.td
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/lib/Target/X86/X86ISelLowering.h
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86Subtarget.h
  llvm/test/CodeGen/X86/uintr-intrinsics.ll
  llvm/test/MC/Disassembler/X86/x86-64.txt
  llvm/test/MC/X86/x86-64.s

Index: llvm/test/MC/X86/x86-64.s
===
--- llvm/test/MC/X86/x86-64.s
+++ llvm/test/MC/X86/x86-64.s
@@ -2018,3 +2018,35 @@
 // CHECK: hreset
 // CHECK: encoding: [0xf3,0x0f,0x3a,0xf0,0xc0,0x01]
 hreset $1
+
+// CHECK: uiret
+// CHECK: encoding: [0xf3,0x0f,0x01,0xec]
+uiret
+
+// CHECK: clui
+// CHECK: encoding: [0xf3,0x0f,0x01,0xee]
+clui
+
+// CHECK: stui
+// CHECK: encoding: [0xf3,0x0f,0x01,0xef]
+stui
+
+// CHECK: testui
+// CHECK: encoding: [0xf3,0x0f,0x01,0xed]
+testui
+
+// CHECK: senduipi %rax
+// CHECK: encoding: [0xf3,0x0f,0xc7,0xf0]
+senduipi %rax
+
+// CHECK: senduipi %rdx
+// CHECK: encoding: [0xf3,0x0f,0xc7,0xf2]
+senduipi %rdx
+
+// CHECK: senduipi %r8
+// CHECK: encoding: [0xf3,0x41,0x0f,0xc7,0xf0]
+senduipi %r8
+
+// CHECK: senduipi %r13
+// CHECK: encoding: [0xf3,0x41,0x0f,0xc7,0xf5]
+senduipi %r13
Index: llvm/test/MC/Disassembler/X86/x86-64.txt
===
--- llvm/test/MC/Disassembler/X86/x86-64.txt
+++ llvm/test/MC/Disassembler/X86/x86-64.txt
@@ -715,3 +715,27 @@
 
 # CHECK: hreset $1
 0xf3 0x0f 0x3a 0xf0 0xc0 0x01
+
+# CHECK: uiret
+0xf3,0x0f,0x01,0xec
+
+# CHECK: clui
+0xf3,0x0f,0x01,0xee
+
+# CHECK: stui
+0xf3,0x0f,0x01,0xef
+
+# CHECK: testui
+0xf3,0x0f,0x01,0xed
+
+# CHECK: senduipi %rax
+0xf3,0x0f,0xc7,0xf0
+
+# CHECK: senduipi %rdx
+0xf3,0x0f,0xc7,0xf2
+
+# CHECK: senduipi %r8
+0xf3,0x41,0x0f,0xc7,0xf0
+
+# CHECK: senduipi %r13
+0xf3,0x41,0x0f,0xc7,0xf5
Index: llvm/test/CodeGen/X86/uintr-intrinsics.ll
===
--- /dev/null
+++ llvm/test/CodeGen/X86/uintr-intrinsics.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+uintr | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -mattr=+uintr | FileCheck %s --check-prefix=X32
+
+define i8 @test_uintr(i64 %arg) {
+; X64-LABEL: test_uintr:
+; X64:   # %bb.0: # %entry
+; X64-NEXT:clui
+; X64-NEXT:stui
+; X64-NEXT:senduipi %rdi
+; X64-NEXT:testui
+; X64-NEXT:setb %al
+; X64-NEXT:retq
+
+; X32-LABEL: test_uintr:
+; X32:   # %bb.0: # %entry
+; X32-NEXT:clui
+; X32-NEXT:stui
+; X32-NEXT:senduipi %rdi
+; X32-NEXT:testui
+; X32-NEXT:setb %al
+; X32-NEXT:retq
+entry:
+  call void @llvm.x86.clui()
+  call void @llvm.x86.stui()
+  call void @llvm.x86.senduipi(i64 %arg)
+  %0 = call i8 @llvm.x86.testui()
+  ret i8 %0
+}
+
+declare void @llvm.x86.clui()
+declare void @llvm.x86.stui()
+declare i8 @llvm.x86.testui()
+declare void @llvm.x86.senduipi(i64 %arg)
Index: llvm/lib/Target/X86/X86Subtarget.h
===
--- llvm/lib/Target/X86/X86Subtarget.h
+++ llvm/lib/Target/X86/X86Subtarget.h
@@ -415,6 +415,9 @@
   bool HasAMXBF16 = false;
   bool HasAMXINT8 = false;
 
+  /// Processor supports User Level Interrupt instructions
+  bool HasUINTR = false;
+
   /// Processor has a single uop BEXTR implementation.
   bool HasFastBEXTR = false;
 
@@ -742,6 +745,7 @@
   bool hasHRESET() const { return HasHRESET; }
   bool hasSERIALIZE() const { return HasSERIALIZE; }
   bool hasTSXLDTRK() const { return HasTSXLDTRK; }
+  bool hasUINTR() const { return HasUINTR; }
   bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
   bool useRetpoline

[PATCH] D90009: [X86] VEX/EVEX prefix doesn't work for inline assembly.

2020-10-22 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/test/CodeGen/X86/att-inline-asm-prefix.c:12
+// CHECK: {evex} vcvtps2pd %xmm0, %xmm1
+"{vex} vcvtps2pd %xmm0, %xmm1\n\t"
+"{vex2} vcvtps2pd %xmm0, %xmm1\n\t"

Better adding a no prefix one.



Comment at: llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h:57-70
 IP_NO_PREFIX = 0,
-IP_HAS_OP_SIZE = 1,
-IP_HAS_AD_SIZE = 2,
-IP_HAS_REPEAT_NE = 4,
-IP_HAS_REPEAT = 8,
-IP_HAS_LOCK = 16,
-IP_HAS_NOTRACK = 32,
-IP_USE_VEX3 = 64,
-IP_USE_DISP8 = 128,
-IP_USE_DISP32 = 256,
+IP_HAS_OP_SIZE = 1U << 0,
+IP_HAS_AD_SIZE = 1U << 1,
+IP_HAS_REPEAT_NE = 1U << 2,
+IP_HAS_REPEAT = 1U << 3,
+IP_HAS_LOCK = 1U << 4,
+IP_HAS_NOTRACK = 1U << 5,

Can we make these `= 1U <<` aligned?



Comment at: llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp:352
+  if (Flags & X86::Force_VEXEncoding)
+O << "\t{vex}";
+  else if (Flags & X86::Force_VEX2Encoding)

`"\t{vex}\t"` ?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90009/new/

https://reviews.llvm.org/D90009

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90009: [X86] VEX/EVEX prefix doesn't work for inline assembly.

2020-10-22 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp:2824
 ForcedVEXEncoding = VEXEncoding_VEX;
+  else if (Prefix == "vex2")
+ForcedVEXEncoding = VEXEncoding_VEX2;

I think it's reasonable if we generate "{vex}" for input "{vex2}"


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90009/new/

https://reviews.llvm.org/D90009

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87528: Enable '#pragma STDC FENV_ACCESS' in frontend

2020-10-27 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

I agreed with Craig. Emitting constrained intrinsics on an unsupported target 
may result in problems. It's better to check if it is supported and prevent 
from the front end.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87528/new/

https://reviews.llvm.org/D87528

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87981: [X86] AMX programming model prototype.

2020-10-27 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/lib/Headers/amxintrin.h:69
 
+#undef __DEFAULT_FN_ATTRS
+

Better to use `__DEFAULT_FN_ATTRS_TILE`, `__DEFAULT_FN_ATTRS_INT8`, then you 
don't need to undef it in the middle.



Comment at: clang/lib/Headers/amxintrin.h:230
+static __inline__ _tile_data __DEFAULT_FN_ATTRS
+_tile_loadd_internal(short m, short n, const void *base, int stride) {
+  return __builtin_ia32_tileloadd64_internal(m, n, base,

How about using the unsigned type?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87981/new/

https://reviews.llvm.org/D87981

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D87981: [X86] AMX programming model prototype.

2020-10-28 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/lib/Headers/amxintrin.h:259
+__DEFAULT_FN_ATTRS
+void __tile_loadd(__tile *dst, const void *base, long stride) {
+  dst->tile = _tile_loadd_internal(dst->row, dst->col, base, stride);

unsigned long long?



Comment at: llvm/include/llvm/CodeGen/TileShapeInfo.h:9
+//
+// This file defines TileShapeInfo for AMX.
+//

Do you want to use `TileShapeInfo` for the class name or forget to change the 
comment to `ShapeT`?



Comment at: llvm/lib/IR/Function.cpp:779
 }
 
 /// IIT_Info - These are enumerators that describe the entries returned by the

There's no new change here. This file shoule be reverted.



Comment at: llvm/test/CodeGen/X86/ipra-reg-usage.ll:6
 define preserve_allcc void @foo()#0 {
-; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw 
$fpsw $fs $gs $hip $ip $mxcsr $rip $riz $ss $ssp $bnd0 $bnd1 $bnd2 $bnd3 $cr0 
$cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 
$cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 
$dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 
$k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 
$st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 $xmm17 $xmm18 
$xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 
$xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 
$ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 
$ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 
$zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 
$zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 
$zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh 
$r11d $r11w $r11wh
+; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $fpcw 
$fpsw $fs $gs $hip $ip $mxcsr $rip $riz $ss $ssp $tmmcfg $bnd0 $bnd1 $bnd2 
$bnd3 $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 
$cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 
$dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 
$k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 
$st4 $st5 $st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 
$xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 
$xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 
$ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 
$ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 
$ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 
$zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 
$zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 
$r11b $r11bh $r11d $r11w $r11wh $k0_k1 $k2_k3 $k4_k5 $k6_k7
   call void @bar1()

Why this patch affects the k registers?



Comment at: llvm/test/CodeGen/X86/opt-pipeline.ll:122
 ; CHECK-NEXT:   X86 WinAlloca Expander
+; CHECK-NEXT:   MachineDominator Tree Construction
+; CHECK-NEXT:   Tile Register Pre-configure

Why the order of "MachineDominator Tree Construction" changed?



Comment at: llvm/utils/TableGen/IntrinsicEmitter.cpp:192
   OS << "return (OTable[id/8] & (1 << (id%8))) != 0;\n";
   OS << "#endif\n\n";
 }

There's no new change here. This file shoule be reverted.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87981/new/

https://reviews.llvm.org/D87981

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D90441: [X86] Add support for vex, vex2, vex3, and evex for MASM

2020-10-29 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/test/CodeGen/X86/ms-inline-asm-prefix.c:1
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc-widows-msvc 
-target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl 
-fasm-blocks -mllvm -x86-asm-syntax=intel -S -o -  | FileCheck %s -check-prefix 
CHECK
+

Maybe need `// REQUIRES: x86-registered-target`



Comment at: clang/test/CodeGen/X86/ms-inline-asm-prefix.c:1
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc-widows-msvc 
-target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl 
-fasm-blocks -mllvm -x86-asm-syntax=intel -S -o -  | FileCheck %s -check-prefix 
CHECK
+

pengfei wrote:
> Maybe need `// REQUIRES: x86-registered-target`
You may need add att check too since you modified the att code.



Comment at: clang/test/CodeGen/X86/ms-inline-asm-prefix.c:1
+// RUN:%clang_cc1 %s -ferror-limit 0 -triple=x86_64-pc-widows-msvc 
-target-feature +avx512f -target-feature +avx2 -target-feature +avx512vl 
-fasm-blocks -mllvm -x86-asm-syntax=intel -S -o -  | FileCheck %s -check-prefix 
CHECK
+

pengfei wrote:
> pengfei wrote:
> > Maybe need `// REQUIRES: x86-registered-target`
> You may need add att check too since you modified the att code.
Should it be avalible only when `-fms-compatibility`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90441/new/

https://reviews.llvm.org/D90441

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D101601: [SelectionDAG] Make fast and linearize visible by clang -pre-RA-sched

2021-05-16 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG976a3e5f6141: [SelectionDAG] Make fast and linearize visible 
by clang -pre-RA-sched (authored by TaoPan, committed by pengfei).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D101601/new/

https://reviews.llvm.org/D101601

Files:
  clang/test/CodeGen/pre-ra-sched.c
  llvm/include/llvm/CodeGen/TargetLowering.h
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp


Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
===
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -270,6 +270,10 @@
   return createHybridListDAGScheduler(IS, OptLevel);
 if (TLI->getSchedulingPreference() == Sched::VLIW)
   return createVLIWDAGScheduler(IS, OptLevel);
+if (TLI->getSchedulingPreference() == Sched::Fast)
+  return createFastDAGScheduler(IS, OptLevel);
+if (TLI->getSchedulingPreference() == Sched::Linearize)
+  return createDAGLinearizer(IS, OptLevel);
 assert(TLI->getSchedulingPreference() == Sched::ILP &&
"Unknown sched type!");
 return createILPListDAGScheduler(IS, OptLevel);
Index: llvm/include/llvm/CodeGen/TargetLowering.h
===
--- llvm/include/llvm/CodeGen/TargetLowering.h
+++ llvm/include/llvm/CodeGen/TargetLowering.h
@@ -94,14 +94,16 @@
 
 namespace Sched {
 
-  enum Preference {
-None, // No preference
-Source,   // Follow source order.
-RegPressure,  // Scheduling for lowest register pressure.
-Hybrid,   // Scheduling for both latency and register pressure.
-ILP,  // Scheduling for ILP in low register pressure mode.
-VLIW  // Scheduling for VLIW targets.
-  };
+enum Preference {
+  None,// No preference
+  Source,  // Follow source order.
+  RegPressure, // Scheduling for lowest register pressure.
+  Hybrid,  // Scheduling for both latency and register pressure.
+  ILP, // Scheduling for ILP in low register pressure mode.
+  VLIW,// Scheduling for VLIW targets.
+  Fast,// Fast suboptimal list scheduling
+  Linearize// Linearize DAG, no scheduling
+};
 
 } // end namespace Sched
 
Index: clang/test/CodeGen/pre-ra-sched.c
===
--- /dev/null
+++ clang/test/CodeGen/pre-ra-sched.c
@@ -0,0 +1,4 @@
+// RUN: %clang %s -mllvm -pre-RA-sched=fast -c -o - | FileCheck %s
+// RUN: %clang %s -mllvm -pre-RA-sched=linearize -c -o - | FileCheck %s
+
+// CHECK-NOT: clang (LLVM option parsing): for the --pre-RA-sched option: 
Cannot find option named


Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
===
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -270,6 +270,10 @@
   return createHybridListDAGScheduler(IS, OptLevel);
 if (TLI->getSchedulingPreference() == Sched::VLIW)
   return createVLIWDAGScheduler(IS, OptLevel);
+if (TLI->getSchedulingPreference() == Sched::Fast)
+  return createFastDAGScheduler(IS, OptLevel);
+if (TLI->getSchedulingPreference() == Sched::Linearize)
+  return createDAGLinearizer(IS, OptLevel);
 assert(TLI->getSchedulingPreference() == Sched::ILP &&
"Unknown sched type!");
 return createILPListDAGScheduler(IS, OptLevel);
Index: llvm/include/llvm/CodeGen/TargetLowering.h
===
--- llvm/include/llvm/CodeGen/TargetLowering.h
+++ llvm/include/llvm/CodeGen/TargetLowering.h
@@ -94,14 +94,16 @@
 
 namespace Sched {
 
-  enum Preference {
-None, // No preference
-Source,   // Follow source order.
-RegPressure,  // Scheduling for lowest register pressure.
-Hybrid,   // Scheduling for both latency and register pressure.
-ILP,  // Scheduling for ILP in low register pressure mode.
-VLIW  // Scheduling for VLIW targets.
-  };
+enum Preference {
+  None,// No preference
+  Source,  // Follow source order.
+  RegPressure, // Scheduling for lowest register pressure.
+  Hybrid,  // Scheduling for both latency and register pressure.
+  ILP, // Scheduling for ILP in low register pressure mode.
+  VLIW,// Scheduling for VLIW targets.
+  Fast,// Fast suboptimal list scheduling
+  Linearize// Linearize DAG, no scheduling
+};
 
 } // end namespace Sched
 
Index: clang/test/CodeGen/pre-ra-sched.c
===
--- /dev/null
+++ clang/test/CodeGen/pre-ra-sched.c
@@ -0,0 +1,4 @@
+// RUN: %clang %s -mllvm -pre-RA-sched=fast -c -o - | FileCheck %s
+// RUN: %cla

[PATCH] D102577: TreeTransform.h: make the switch case more beautiful

2021-05-17 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

I think you can add a NFC in the title.




Comment at: clang/lib/Sema/TreeTransform.h:3947-3949
 NestedNameSpecifierLoc NNS,
  QualType ObjectType,
  NamedDecl *FirstQualifierInScope) 
{

There're some format issues in this function. It's better if you can fix them 
too. You can use clang-format to help with you.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102577/new/

https://reviews.llvm.org/D102577

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102577: NFC: TreeTransform.h: make the function TransformNestedNameSpecifierLoc more beautiful

2021-05-17 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

LGTM. Thanks for improving it.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102577/new/

https://reviews.llvm.org/D102577

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98895: [X86][Draft] Disable long double type for -mno-x87 option

2021-05-18 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/test/Sema/x86-no-x87.c:48-61
+void assign2() {
+  struct st_long_double st;
+#ifndef NOERROR
+  // expected-error@+2{{long double is not supported on this target}}
+#endif
+  st.ld = 0.42;
+}

These seems pass with GCC. https://godbolt.org/z/qM4nWhThx


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98895/new/

https://reviews.llvm.org/D98895

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98895: [X86][Draft] Disable long double type for -mno-x87 option

2021-05-19 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/test/Sema/x86-no-x87.c:48-61
+void assign2() {
+  struct st_long_double st;
+#ifndef NOERROR
+  // expected-error@+2{{long double is not supported on this target}}
+#endif
+  st.ld = 0.42;
+}

asavonic wrote:
> pengfei wrote:
> > These seems pass with GCC. https://godbolt.org/z/qM4nWhThx
> Right. Assignment of a literal is compiled to just `mov` without any x87 
> instructions, so it is not diagnosed by GCC. On the other hand, assignment of 
> a variable does trigger the error:
> 
> void assign4(double d) {
>   struct st_long_double st;
>   st.ld = d; // error: long double is not supported on this target
> }
> 
> We can update the patch to do the same for some cases, but it does not look 
> very consistent, and makes assumptions on how the code is optimized and 
> compiled.
> 
> GCC has an advantage here, because it emits the diagnostic at a lower level 
> after at lease some optimizations are done. For example, the following code 
> does not trigger an error for GCC because of inlining:
> 
> double get_const() {
>   return 0.42;
> }
> void assign5(struct st_long_double *st) {
>   st->ld = get_const();
> }
> 
I see. Another concern is about the 32 bits. @LiuChen3 had tested in D100091 
that GCC doesn't error for 32 bits. Do we need to consider it here?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98895/new/

https://reviews.llvm.org/D98895

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D102577: NFC: TreeTransform.h: make the function TransformNestedNameSpecifierLoc more beautiful

2021-05-21 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGe6ec7ab5820f: NFC: TreeTransform.h: make the function 
TransformNestedNameSpecifierLoc more… (authored by zhouyizhou, committed by 
pengfei).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102577/new/

https://reviews.llvm.org/D102577

Files:
  clang/lib/Sema/TreeTransform.h


Index: clang/lib/Sema/TreeTransform.h
===
--- clang/lib/Sema/TreeTransform.h
+++ clang/lib/Sema/TreeTransform.h
@@ -4031,12 +4031,10 @@
   return Sema::ConditionResult();
 }
 
-template
-NestedNameSpecifierLoc
-TreeTransform::TransformNestedNameSpecifierLoc(
-NestedNameSpecifierLoc NNS,
- QualType ObjectType,
- NamedDecl *FirstQualifierInScope) 
{
+template 
+NestedNameSpecifierLoc TreeTransform::TransformNestedNameSpecifierLoc(
+NestedNameSpecifierLoc NNS, QualType ObjectType,
+NamedDecl *FirstQualifierInScope) {
   SmallVector Qualifiers;
   for (NestedNameSpecifierLoc Qualifier = NNS; Qualifier;
Qualifier = Qualifier.getPrefix())
@@ -4050,28 +4048,26 @@
 switch (QNNS->getKind()) {
 case NestedNameSpecifier::Identifier: {
   Sema::NestedNameSpecInfo IdInfo(QNNS->getAsIdentifier(),
-  Q.getLocalBeginLoc(), Q.getLocalEndLoc(), 
ObjectType);
+  Q.getLocalBeginLoc(), Q.getLocalEndLoc(),
+  ObjectType);
   if (SemaRef.BuildCXXNestedNameSpecifier(/*Scope=*/nullptr, IdInfo, false,
   SS, FirstQualifierInScope, 
false))
 return NestedNameSpecifierLoc();
-}
   break;
+}
 
 case NestedNameSpecifier::Namespace: {
-  NamespaceDecl *NS
-= cast_or_null(
-getDerived().TransformDecl(
-  Q.getLocalBeginLoc(),
-   
QNNS->getAsNamespace()));
+  NamespaceDecl *NS =
+  cast_or_null(getDerived().TransformDecl(
+  Q.getLocalBeginLoc(), QNNS->getAsNamespace()));
   SS.Extend(SemaRef.Context, NS, Q.getLocalBeginLoc(), Q.getLocalEndLoc());
   break;
 }
 
 case NestedNameSpecifier::NamespaceAlias: {
-  NamespaceAliasDecl *Alias
-= cast_or_null(
-  getDerived().TransformDecl(Q.getLocalBeginLoc(),
- QNNS->getAsNamespaceAlias()));
+  NamespaceAliasDecl *Alias =
+  cast_or_null(getDerived().TransformDecl(
+  Q.getLocalBeginLoc(), QNNS->getAsNamespaceAlias()));
   SS.Extend(SemaRef.Context, Alias, Q.getLocalBeginLoc(),
 Q.getLocalEndLoc());
   break;
@@ -4107,7 +4103,7 @@
 if (TL.getType()->isEnumeralType())
   SemaRef.Diag(TL.getBeginLoc(),
diag::warn_cxx98_compat_enum_nested_name_spec);
-SS.Extend(SemaRef.Context, /*FIXME:*/SourceLocation(), TL,
+SS.Extend(SemaRef.Context, /*FIXME:*/ SourceLocation(), TL,
   Q.getLocalEndLoc());
 break;
   }
@@ -4116,7 +4112,7 @@
   TypedefTypeLoc TTL = TL.getAs();
   if (!TTL || !TTL.getTypedefNameDecl()->isInvalidDecl()) {
 SemaRef.Diag(TL.getBeginLoc(), diag::err_nested_name_spec_non_tag)
-  << TL.getType() << SS.getRange();
+<< TL.getType() << SS.getRange();
   }
   return NestedNameSpecifierLoc();
 }


Index: clang/lib/Sema/TreeTransform.h
===
--- clang/lib/Sema/TreeTransform.h
+++ clang/lib/Sema/TreeTransform.h
@@ -4031,12 +4031,10 @@
   return Sema::ConditionResult();
 }
 
-template
-NestedNameSpecifierLoc
-TreeTransform::TransformNestedNameSpecifierLoc(
-NestedNameSpecifierLoc NNS,
- QualType ObjectType,
- NamedDecl *FirstQualifierInScope) {
+template 
+NestedNameSpecifierLoc TreeTransform::TransformNestedNameSpecifierLoc(
+NestedNameSpecifierLoc NNS, QualType ObjectType,
+NamedDecl *FirstQualifierInScope) {
   SmallVector Qualifiers;
   for (NestedNameSpecifierLoc Qualifier = NNS; Qualifier;
Qualifier = Qualifier.getPrefix())
@@ -4050,28 +4048,26 @@
 switch (QNNS->getKind()) {
 case NestedNameSpecifier::Identifier: {
   Sema::NestedNameSpecInfo IdInfo(QNNS->getAsIdentifier(),
-  Q.getLocalBeginLoc(), Q.getLocalEndLoc(), ObjectType);
+  Q.getLocalBeginLoc(), Q.getLocalEndLoc(),
+  ObjectType);
   if (S

[PATCH] D98895: [X86][Draft] Disable long double type for -mno-x87 option

2021-05-25 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/test/Sema/x86-no-x87.c:2
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu 
-target-feature -x87
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple i686-linux-gnu 
-target-feature -x87
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-windows-msvc 
-target-feature -x87 -DNOERROR

Should i686 expect no error like GCC?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98895/new/

https://reviews.llvm.org/D98895

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98895: [X86][Draft] Disable long double type for -mno-x87 option

2021-05-25 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/test/Sema/x86-no-x87.c:2
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu 
-target-feature -x87
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple i686-linux-gnu 
-target-feature -x87
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-windows-msvc 
-target-feature -x87 -DNOERROR

asavonic wrote:
> pengfei wrote:
> > Should i686 expect no error like GCC?
> GCC seems to fallback to soft-float for i686 if -mno-80387 is used:
> 
>   long double orig(long double x, long double y)
>   {
> long double z = x + y;
> if (z < 0.0)
>   return z;
> else
>   return 0.0;
>   }
> 
> i686-linux-gnu-gcc-8 -c -S -mno-80387 -O3:
> call  __addxf3@PLT
> [...]
> call  __ltxf2@PLT
> addl  $32, %esp
> testl %eax, %eax
> js.L3
> xorl  %esi, %esi
> xorl  %edi, %edi
> xorl  %ebp, %ebp
>   .L3:
> addl  $12, %esp
> movl  %esi, %eax
> movl  %edi, %edx
> movl  %ebp, %ecx
> popl  %ebx
> popl  %esi
> popl  %edi
> popl  %ebp
> ret
> 
> This looks like a different ABI.
> X87 instructions are not used, so no error is reported.
> 
I found it's a bit complex for 32 bits.
1. i686 ABI specifies the return of floating point type must be put in %st0, so 
any FP type returning should be error out w/o x87.
2. GCC doesn't respect above ABI.
3. FP types are passed from stack, so a function like `void orig(long double x, 
long double y, long double *z)` should not be error out w/o x87. 
x86_64 only uses ST registers when returning FP80.
Considering it is rare for case 3, I think we can ignore it this time, but I 
suggest we should add check for float and double on 32 bits.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98895/new/

https://reviews.llvm.org/D98895

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98895: [X86][Draft] Disable long double type for -mno-x87 option

2021-05-27 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

LGTM. But let's wait one or more days to see if others have more comments.




Comment at: clang/lib/Sema/SemaChecking.cpp:4762
+
+  for (ParmVarDecl *Param : FDecl->parameters()) {
+CheckTargetTypeSupport(Context.getTargetInfo(), Param->getType(),

Nit: We can save the curly bracket for it.



Comment at: clang/lib/Sema/SemaChecking.cpp:14202
+if (CheckTargetTypeSupport(Context.getTargetInfo(), Param->getType(),
+   Param->getLocation(), /*IsReturnType=*/false)) {
+  HasInvalidParm = true;

ditto


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98895/new/

https://reviews.llvm.org/D98895

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D80344: [Windows SEH]: HARDWARE EXCEPTION HANDLING (MSVC -EHa) - Part 1

2021-06-03 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

We found another crush: https://godbolt.org/z/vcbvddEKr


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80344/new/

https://reviews.llvm.org/D80344

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D95421: [NFC] Refine some uninitialized used variables.

2021-01-25 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95421/new/

https://reviews.llvm.org/D95421

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D94466: [X86] merge "={eax}" and "~{eax}" into "=&eax" for MSInlineASM

2021-01-26 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/lib/CodeGen/CGStmt.cpp:2490
+  continue;
+std::string::size_type position1 = Constraints.find("={eax}");
+if (position1 != std::string::npos) {

If `Clobber` is `edx` only, we shouldn't change `"={eax}"` to `"=&{eax}"`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D94466/new/

https://reviews.llvm.org/D94466

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D94614: [FPEnv][X86] Platform builtins edition: clang should get from the AST the metadata for constrained FP builtins

2021-01-26 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

LGTM. Thanks


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D94614/new/

https://reviews.llvm.org/D94614

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D94466: [X86] merge "={eax}" and "~{eax}" into "=&eax" for MSInlineASM

2021-01-26 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

LGTM.




Comment at: clang/lib/CodeGen/CGStmt.cpp:2491
+std::string::size_type position1 =
+Constraints.find("={" + Clobber.str() + "}");
+if (position1 != std::string::npos) {

I'm not sure if there's real case that there's `"={edx}"` for MS inline asm, 
but there's no problem we handle it here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D94466/new/

https://reviews.llvm.org/D94466

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D93179: [X86] Convert fmin/fmax _mm_reduce_* intrinsics to emit llvm.reduction intrinsics (PR47506)

2021-02-01 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

Hi @RKSimon, what's the status of updating these reduce intrinsics? Is there 
any difficulty for always assigning them fast math flag? I received bug report 
for the previous change D92940 . Can we revert 
it if the problem is not easy to fix?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93179/new/

https://reviews.llvm.org/D93179

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D93179: [X86] Convert fmin/fmax _mm_reduce_* intrinsics to emit llvm.reduction intrinsics (PR47506)

2021-02-06 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

In D93179#2535702 , @pengfei wrote:

> Hi @RKSimon, what's the status of updating these reduce intrinsics? Is there 
> any difficulty for always assigning them fast math flag? I received bug 
> report for the previous change D92940 . Can 
> we revert it if the problem is not easy to fix?

Any thoughts @RKSimon?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93179/new/

https://reviews.llvm.org/D93179

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D96231: [X86] Always assign reassoc flag for intrinsics *reduce_add/mul_ps/pd.

2021-02-07 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei created this revision.
pengfei added reviewers: RKSimon, craig.topper, spatel.
pengfei requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Intrinsics *reduce_add/mul_ps/pd have assumption that the elements in
the vector are reassociable. So we need to always assign the reassoc
flag when we call _mm_reduce_* intrinsics.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D96231

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/avx512fintrin.h
  clang/test/CodeGen/X86/avx512-reduceIntrin.c

Index: clang/test/CodeGen/X86/avx512-reduceIntrin.c
===
--- clang/test/CodeGen/X86/avx512-reduceIntrin.c
+++ clang/test/CodeGen/X86/avx512-reduceIntrin.c
@@ -115,25 +115,25 @@
 
 double test_mm512_reduce_add_pd(__m512d __W){
 // CHECK-LABEL: @test_mm512_reduce_add_pd(
-// CHECK:call double @llvm.vector.reduce.fadd.v8f64(double 0.00e+00, <8 x double> %{{.*}})
+// CHECK:call reassoc double @llvm.vector.reduce.fadd.v8f64(double 0.00e+00, <8 x double> %{{.*}})
   return _mm512_reduce_add_pd(__W); 
 }
 
 double test_mm512_reduce_mul_pd(__m512d __W){
 // CHECK-LABEL: @test_mm512_reduce_mul_pd(
-// CHECK:call double @llvm.vector.reduce.fmul.v8f64(double 1.00e+00, <8 x double> %{{.*}})
+// CHECK:call reassoc double @llvm.vector.reduce.fmul.v8f64(double 1.00e+00, <8 x double> %{{.*}})
   return _mm512_reduce_mul_pd(__W); 
 }
 
 float test_mm512_reduce_add_ps(__m512 __W){
 // CHECK-LABEL: @test_mm512_reduce_add_ps(
-// CHECK:call float @llvm.vector.reduce.fadd.v16f32(float 0.00e+00, <16 x float> %{{.*}})
+// CHECK:call reassoc float @llvm.vector.reduce.fadd.v16f32(float 0.00e+00, <16 x float> %{{.*}})
   return _mm512_reduce_add_ps(__W); 
 }
 
 float test_mm512_reduce_mul_ps(__m512 __W){
 // CHECK-LABEL: @test_mm512_reduce_mul_ps(
-// CHECK:call float @llvm.vector.reduce.fmul.v16f32(float 1.00e+00, <16 x float> %{{.*}})
+// CHECK:call reassoc float @llvm.vector.reduce.fmul.v16f32(float 1.00e+00, <16 x float> %{{.*}})
   return _mm512_reduce_mul_ps(__W); 
 }
 
@@ -141,7 +141,7 @@
 // CHECK-LABEL: @test_mm512_mask_reduce_add_pd(
 // CHECK:bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
-// CHECK:call double @llvm.vector.reduce.fadd.v8f64(double 0.00e+00, <8 x double> %{{.*}})
+// CHECK:call reassoc double @llvm.vector.reduce.fadd.v8f64(double 0.00e+00, <8 x double> %{{.*}})
   return _mm512_mask_reduce_add_pd(__M, __W); 
 }
 
@@ -149,7 +149,7 @@
 // CHECK-LABEL: @test_mm512_mask_reduce_mul_pd(
 // CHECK:bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
-// CHECK:call double @llvm.vector.reduce.fmul.v8f64(double 1.00e+00, <8 x double> %{{.*}})
+// CHECK:call reassoc double @llvm.vector.reduce.fmul.v8f64(double 1.00e+00, <8 x double> %{{.*}})
   return _mm512_mask_reduce_mul_pd(__M, __W); 
 }
 
@@ -157,7 +157,7 @@
 // CHECK-LABEL: @test_mm512_mask_reduce_add_ps(
 // CHECK:bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:select <16 x i1> %{{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}}
-// CHECK:call float @llvm.vector.reduce.fadd.v16f32(float 0.00e+00, <16 x float> %{{.*}})
+// CHECK:call reassoc float @llvm.vector.reduce.fadd.v16f32(float 0.00e+00, <16 x float> %{{.*}})
   return _mm512_mask_reduce_add_ps(__M, __W); 
 }
 
@@ -165,6 +165,6 @@
 // CHECK-LABEL: @test_mm512_mask_reduce_mul_ps(
 // CHECK:bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:select <16 x i1> %{{.*}}, <16 x float> {{.*}}, <16 x float> %{{.*}}
-// CHECK:call float @llvm.vector.reduce.fmul.v16f32(float 1.00e+00, <16 x float> %{{.*}})
+// CHECK:call reassoc float @llvm.vector.reduce.fmul.v16f32(float 1.00e+00, <16 x float> %{{.*}})
   return _mm512_mask_reduce_mul_ps(__M, __W); 
 }
Index: clang/lib/Headers/avx512fintrin.h
===
--- clang/lib/Headers/avx512fintrin.h
+++ clang/lib/Headers/avx512fintrin.h
@@ -9300,6 +9300,9 @@
  * computations. In vector-reduction arithmetic, the evaluation off is
  * independent of the order of the input elements of V.
 
+ * For floating points type, we always assume the elements are reassociable even
+ * if -fast-math is off.
+
  * Used bisection method. At each step, we partition the vector with previous
  * step in half, and the operation is performed on its two halves.
  * This takes log2(n) steps where n is the number of elements in the vector.
Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -13826,12 +13826,14 @@
   case X86::BI__builtin_ia32_reduce_fadd_ps512: {
 Function *F =
 CGM.getIntrinsic(Intrinsic::vect

[PATCH] D93179: [X86] Convert fmin/fmax _mm_reduce_* intrinsics to emit llvm.reduction intrinsics (PR47506)

2021-02-07 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

In D93179#2547306 , @RKSimon wrote:

> @pengfei I'm sorry I haven't gotten back to looking at this yet - it makes 
> sense to create a patch to revert the fadd/fmul reduction changes for 
> trunk/12.x.

Thanks @RKSimon, I had a try and found maybe we don't need to revert it. See 
D96231 .


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93179/new/

https://reviews.llvm.org/D93179

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109607: [X86] Refactor GetSSETypeAtOffset to fix pr51813

2021-09-16 Thread Pengfei Wang via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGe9e1d4751b54: [X86] Refactor GetSSETypeAtOffset to fix 
pr51813 (authored by pengfei).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109607/new/

https://reviews.llvm.org/D109607

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/X86/avx512fp16-abi.c

Index: clang/test/CodeGen/X86/avx512fp16-abi.c
===
--- clang/test/CodeGen/X86/avx512fp16-abi.c
+++ clang/test/CodeGen/X86/avx512fp16-abi.c
@@ -1,11 +1,12 @@
-// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm  -target-feature +avx512fp16 < %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm  -target-feature +avx512fp16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm  -target-feature +avx512fp16 -x c++ -std=c++11 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-CPP
 
 struct half1 {
   _Float16 a;
 };
 
 struct half1 h1(_Float16 a) {
-  // CHECK: define{{.*}}half @h1
+  // CHECK: define{{.*}}half @
   struct half1 x;
   x.a = a;
   return x;
@@ -17,7 +18,7 @@
 };
 
 struct half2 h2(_Float16 a, _Float16 b) {
-  // CHECK: define{{.*}}<2 x half> @h2
+  // CHECK: define{{.*}}<2 x half> @
   struct half2 x;
   x.a = a;
   x.b = b;
@@ -31,7 +32,7 @@
 };
 
 struct half3 h3(_Float16 a, _Float16 b, _Float16 c) {
-  // CHECK: define{{.*}}<4 x half> @h3
+  // CHECK: define{{.*}}<4 x half> @
   struct half3 x;
   x.a = a;
   x.b = b;
@@ -47,7 +48,7 @@
 };
 
 struct half4 h4(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
-  // CHECK: define{{.*}}<4 x half> @h4
+  // CHECK: define{{.*}}<4 x half> @
   struct half4 x;
   x.a = a;
   x.b = b;
@@ -62,7 +63,7 @@
 };
 
 struct floathalf fh(float a, _Float16 b) {
-  // CHECK: define{{.*}}<4 x half> @fh
+  // CHECK: define{{.*}}<4 x half> @
   struct floathalf x;
   x.a = a;
   x.b = b;
@@ -76,7 +77,7 @@
 };
 
 struct floathalf2 fh2(float a, _Float16 b, _Float16 c) {
-  // CHECK: define{{.*}}<4 x half> @fh2
+  // CHECK: define{{.*}}<4 x half> @
   struct floathalf2 x;
   x.a = a;
   x.b = b;
@@ -90,7 +91,7 @@
 };
 
 struct halffloat hf(_Float16 a, float b) {
-  // CHECK: define{{.*}}<4 x half> @hf
+  // CHECK: define{{.*}}<4 x half> @
   struct halffloat x;
   x.a = a;
   x.b = b;
@@ -104,7 +105,7 @@
 };
 
 struct half2float h2f(_Float16 a, _Float16 b, float c) {
-  // CHECK: define{{.*}}<4 x half> @h2f
+  // CHECK: define{{.*}}<4 x half> @
   struct half2float x;
   x.a = a;
   x.b = b;
@@ -120,7 +121,7 @@
 };
 
 struct floathalf3 fh3(float a, _Float16 b, _Float16 c, _Float16 d) {
-  // CHECK: define{{.*}}{ <4 x half>, half } @fh3
+  // CHECK: define{{.*}}{ <4 x half>, half } @
   struct floathalf3 x;
   x.a = a;
   x.b = b;
@@ -138,7 +139,7 @@
 };
 
 struct half5 h5(_Float16 a, _Float16 b, _Float16 c, _Float16 d, _Float16 e) {
-  // CHECK: define{{.*}}{ <4 x half>, half } @h5
+  // CHECK: define{{.*}}{ <4 x half>, half } @
   struct half5 x;
   x.a = a;
   x.b = b;
@@ -147,3 +148,52 @@
   x.e = e;
   return x;
 }
+
+struct float2 {
+  struct {} s;
+  float a;
+  float b;
+};
+
+float pr51813(struct float2 s) {
+  // CHECK-C: define{{.*}} @pr51813(<2 x float>
+  // CHECK-CPP: define{{.*}} @_Z7pr518136float2(double {{.*}}, float
+  return s.a;
+}
+
+struct float3 {
+  float a;
+  struct {} s;
+  float b;
+};
+
+float pr51813_2(struct float3 s) {
+  // CHECK-C: define{{.*}} @pr51813_2(<2 x float>
+  // CHECK-CPP: define{{.*}} @_Z9pr51813_26float3(double {{.*}}, float
+  return s.a;
+}
+
+struct shalf2 {
+  struct {} s;
+  _Float16 a;
+  _Float16 b;
+};
+
+_Float16 sf2(struct shalf2 s) {
+  // CHECK-C: define{{.*}} @sf2(<2 x half>
+  // CHECK-CPP: define{{.*}} @_Z3sf26shalf2(double {{.*}}
+  return s.a;
+};
+
+struct halfs2 {
+  _Float16 a;
+  struct {} s1;
+  _Float16 b;
+  struct {} s2;
+};
+
+_Float16 fs2(struct shalf2 s) {
+  // CHECK-C: define{{.*}} @fs2(<2 x half>
+  // CHECK-CPP: define{{.*}} @_Z3fs26shalf2(double {{.*}}
+  return s.a;
+};
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -3407,52 +3407,18 @@
   return false;
 }
 
-/// ContainsFloatAtOffset - Return true if the specified LLVM IR type has a
-/// float member at the specified offset.  For example, {int,{float}} has a
-/// float at offset 4.  It is conservatively correct for this routine to return
-/// false.
-static bool ContainsFloatAtOffset(llvm::Type *IRType, unsigned IROffset,
-  const llvm::DataLayout &TD) {
-  // Base case if we find a float.
-  if (IROffset == 0 && IRType->isFloatTy())
-return true;
-
-  // If this is a struct, recurse into the field at the specified offset.
-  if (llvm::StructType *STy = dyn_cast(IRType)) {
-const llvm::StructLayout *SL = TD.getStructLay

[PATCH] D109607: [X86] Refactor GetSSETypeAtOffset to fix pr51813

2021-09-18 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

In D109607#3007860 , @Meinersbur 
wrote:

> This patch seem to have broken `GCC-C-execute-pr44575` from the 
> llvm-test-suite. See http://meinersbur.de:8011/#/builders/76/builds/761 (this 
> builder compiles with Polly, but it also crashes without Polly)

Thanks @Meinersbur for reporting this. Do you have a small reproducer or the 
crash log? I didn't find any detail about the crash on the bot.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109607/new/

https://reviews.llvm.org/D109607

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D110037: [X86] Always check the size of SourceTy before getting the next type

2021-09-19 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei created this revision.
pengfei added reviewers: LuoYuanke, Meinersbur, craig.topper, RKSimon.
pengfei requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

D109607  results in a regression in 
llvm-test-suite.
The reason is we didn't check the size of SourceTy, so that we will
return wrong SSE type when SourceTy is overlapped.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D110037

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/X86/va-arg-sse.c


Index: clang/test/CodeGen/X86/va-arg-sse.c
===
--- clang/test/CodeGen/X86/va-arg-sse.c
+++ clang/test/CodeGen/X86/va-arg-sse.c
@@ -17,23 +17,30 @@
 // CHECK-NEXT:[[FITS_IN_FP:%.*]] = icmp ult i32 [[FP_OFFSET]], 145
 // CHECK-NEXT:br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label 
[[VAARG_IN_MEM:%.*]]
 // CHECK:   vaarg.in_reg:
-// CHECK-NEXT:[[TMP1:%.*]] = add nuw nsw i32 [[FP_OFFSET]], 32
-// CHECK-NEXT:store i32 [[TMP1]], i32* [[FP_OFFSET_P]], align 4
+// CHECK-NEXT:[[TMP1:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], [1 x %struct.__va_list_tag]* [[AP]], i64 0, i64 0, i32 3
+// CHECK-NEXT:[[REG_SAVE_AREA:%.*]] = load i8*, i8** [[TMP1]], align 16
+// CHECK-NEXT:[[TMP2:%.*]] = zext i32 [[FP_OFFSET]] to i64
+// CHECK-NEXT:[[TMP3:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i64 
[[TMP2]]
+// CHECK-NEXT:[[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i64 
16
+// CHECK-NEXT:[[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
+// CHECK-NEXT:[[TMP6:%.*]] = load float, float* [[TMP5]], align 16
+// CHECK-NEXT:[[TMP7:%.*]] = add nuw nsw i32 [[FP_OFFSET]], 32
+// CHECK-NEXT:store i32 [[TMP7]], i32* [[FP_OFFSET_P]], align 4
 // CHECK-NEXT:br label [[VAARG_END:%.*]]
 // CHECK:   vaarg.in_mem:
 // CHECK-NEXT:[[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], [1 x %struct.__va_list_tag]* [[AP]], i64 0, i64 0, i32 2
 // CHECK-NEXT:[[OVERFLOW_ARG_AREA:%.*]] = load i8*, i8** 
[[OVERFLOW_ARG_AREA_P]], align 8
 // CHECK-NEXT:[[DOTSROA_GEP:%.*]] = getelementptr inbounds i8, i8* 
[[OVERFLOW_ARG_AREA]], i64 8
-// CHECK-NEXT:[[TMP2:%.*]] = bitcast i8* [[DOTSROA_GEP]] to float*
+// CHECK-NEXT:[[TMP8:%.*]] = bitcast i8* [[DOTSROA_GEP]] to float*
 // CHECK-NEXT:[[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, i8* 
[[OVERFLOW_ARG_AREA]], i64 16
 // CHECK-NEXT:store i8* [[OVERFLOW_ARG_AREA_NEXT]], i8** 
[[OVERFLOW_ARG_AREA_P]], align 8
-// CHECK-NEXT:[[VAARG_ADDR_SROA_PHI_SROA_SPECULATE_LOAD_VAARG_IN_MEM:%.*]] 
= load float, float* [[TMP2]], align 4, !tbaa.struct !2
+// CHECK-NEXT:[[VAARG_ADDR_SROA_PHI_SROA_SPECULATE_LOAD_VAARG_IN_MEM:%.*]] 
= load float, float* [[TMP8]], align 4, !tbaa.struct !2
 // CHECK-NEXT:br label [[VAARG_END]]
 // CHECK:   vaarg.end:
-// CHECK-NEXT:[[VAARG_ADDR_SROA_PHI_SROA_SPECULATED:%.*]] = phi float [ 
undef, [[VAARG_IN_REG]] ], [ 
[[VAARG_ADDR_SROA_PHI_SROA_SPECULATE_LOAD_VAARG_IN_MEM]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT:[[VAARG_ADDR_SROA_PHI_SROA_SPECULATED:%.*]] = phi float [ 
[[TMP6]], [[VAARG_IN_REG]] ], [ 
[[VAARG_ADDR_SROA_PHI_SROA_SPECULATE_LOAD_VAARG_IN_MEM]], [[VAARG_IN_MEM]] ]
 // CHECK-NEXT:call void @llvm.va_end(i8* nonnull [[TMP0]])
-// CHECK-NEXT:[[TMP3:%.*]] = load float, float* getelementptr inbounds ([5 
x %struct.S], [5 x %struct.S]* @a, i64 0, i64 2, i32 0, i64 2), align 16, !tbaa 
[[TBAA6:![0-9]+]]
-// CHECK-NEXT:[[CMP:%.*]] = fcmp oeq float [[TMP3]], 
[[VAARG_ADDR_SROA_PHI_SROA_SPECULATED]]
+// CHECK-NEXT:[[TMP9:%.*]] = load float, float* getelementptr inbounds ([5 
x %struct.S], [5 x %struct.S]* @a, i64 0, i64 2, i32 0, i64 2), align 16, !tbaa 
[[TBAA6:![0-9]+]]
+// CHECK-NEXT:[[CMP:%.*]] = fcmp oeq float [[TMP9]], 
[[VAARG_ADDR_SROA_PHI_SROA_SPECULATED]]
 // CHECK-NEXT:[[RETVAL_0:%.*]] = zext i1 [[CMP]] to i32
 // CHECK-NEXT:call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull 
[[TMP0]]) #[[ATTR3]]
 // CHECK-NEXT:ret i32 [[RETVAL_0]]
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -3438,17 +3438,21 @@
 GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
QualType SourceTy, unsigned SourceOffset) const {
   const llvm::DataLayout &TD = getDataLayout();
+  unsigned SourceSize =
+  (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
   llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD);
   if (!T0 || T0->isDoubleTy())
 return llvm::Type::getDoubleTy(getVMContext());
 
   // Get the adjacent FP type.
-  llvm::Type *T1 =
-  getFPTypeAtOffset(IRType, IROffset + TD.getTypeAllocSize(T0), TD);
+  llvm::Type *T1 = nullptr;
+  unsigned T0Size = TD.getTypeAllocSize(

[PATCH] D109607: [X86] Refactor GetSSETypeAtOffset to fix pr51813

2021-09-19 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

In D109607#3007860 , @Meinersbur 
wrote:

> This patch seem to have broken `GCC-C-execute-pr44575` from the 
> llvm-test-suite. See http://meinersbur.de:8011/#/builders/76/builds/761 (this 
> builder compiles with Polly, but it also crashes without Polly)

@Meinersbur, sorry for the late response. I just managed to reproduce the 
failure. I create D110037  to try to fix this 
problem. The test passed locally.
By the way, does this bot send notification to authors when it fails? I didn't 
receive this fail, so I'm not aware of it at the first time.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109607/new/

https://reviews.llvm.org/D109607

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D110037: [X86] Always check the size of SourceTy before getting the next type

2021-09-19 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

I'm wondering whether the test case is required or not for this patch. Reasons:

1. We have a test in llvm-test-suite can cover this and the test is just a 
snippet of it;
2. The test case can not reflect the direct effect of this change;
3. There're many variables in IR which may be easily affected by unrelated 
changes, which is annoying to others.

What do you think?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D110037/new/

https://reviews.llvm.org/D110037

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D110037: [X86] Always check the size of SourceTy before getting the next type

2021-09-20 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei updated this revision to Diff 373581.
pengfei marked 2 inline comments as done.
pengfei added a comment.

Address review comments.

Thanks Roman and Michael.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D110037/new/

https://reviews.llvm.org/D110037

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/X86/va-arg-sse.c


Index: clang/test/CodeGen/X86/va-arg-sse.c
===
--- clang/test/CodeGen/X86/va-arg-sse.c
+++ clang/test/CodeGen/X86/va-arg-sse.c
@@ -34,16 +34,16 @@
 // CHECK-NEXT:[[REG_SAVE_AREA:%.*]] = load i8*, i8** [[TMP0]], align 16
 // CHECK-NEXT:[[TMP1:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i32 
[[FP_OFFSET]]
 // CHECK-NEXT:[[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 
16
-// CHECK-NEXT:[[TMP3:%.*]] = bitcast %struct.S* [[TMP]] to { <2 x float>, 
<2 x float> }*
+// CHECK-NEXT:[[TMP3:%.*]] = bitcast %struct.S* [[TMP]] to { <2 x float>, 
float }*
 // CHECK-NEXT:[[TMP4:%.*]] = bitcast i8* [[TMP1]] to <2 x float>*
 // CHECK-NEXT:[[TMP5:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], 
align 16
-// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds { <2 x float>, <2 x 
float> }, { <2 x float>, <2 x float> }* [[TMP3]], i32 0, i32 0
+// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds { <2 x float>, float 
}, { <2 x float>, float }* [[TMP3]], i32 0, i32 0
 // CHECK-NEXT:store <2 x float> [[TMP5]], <2 x float>* [[TMP6]], align 4
-// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP2]] to <2 x float>*
-// CHECK-NEXT:[[TMP8:%.*]] = load <2 x float>, <2 x float>* [[TMP7]], 
align 16
-// CHECK-NEXT:[[TMP9:%.*]] = getelementptr inbounds { <2 x float>, <2 x 
float> }, { <2 x float>, <2 x float> }* [[TMP3]], i32 0, i32 1
-// CHECK-NEXT:store <2 x float> [[TMP8]], <2 x float>* [[TMP9]], align 4
-// CHECK-NEXT:[[TMP10:%.*]] = bitcast { <2 x float>, <2 x float> }* 
[[TMP3]] to %struct.S*
+// CHECK-NEXT:[[TMP7:%.*]] = bitcast i8* [[TMP2]] to float*
+// CHECK-NEXT:[[TMP8:%.*]] = load float, float* [[TMP7]], align 16
+// CHECK-NEXT:[[TMP9:%.*]] = getelementptr inbounds { <2 x float>, float 
}, { <2 x float>, float }* [[TMP3]], i32 0, i32 1
+// CHECK-NEXT:store float [[TMP8]], float* [[TMP9]], align 4
+// CHECK-NEXT:[[TMP10:%.*]] = bitcast { <2 x float>, float }* [[TMP3]] to 
%struct.S*
 // CHECK-NEXT:[[TMP11:%.*]] = add i32 [[FP_OFFSET]], 32
 // CHECK-NEXT:store i32 [[TMP11]], i32* [[FP_OFFSET_P]], align 4
 // CHECK-NEXT:br label [[VAARG_END:%.*]]
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -3438,17 +3438,21 @@
 GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
QualType SourceTy, unsigned SourceOffset) const {
   const llvm::DataLayout &TD = getDataLayout();
+  unsigned SourceSize =
+  (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
   llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD);
   if (!T0 || T0->isDoubleTy())
 return llvm::Type::getDoubleTy(getVMContext());
 
   // Get the adjacent FP type.
-  llvm::Type *T1 =
-  getFPTypeAtOffset(IRType, IROffset + TD.getTypeAllocSize(T0), TD);
+  llvm::Type *T1 = nullptr;
+  unsigned T0Size = TD.getTypeAllocSize(T0);
+  if (SourceSize > T0Size)
+  T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD);
   if (T1 == nullptr) {
 // Check if IRType is a half + float. float type will be in IROffset+4 due
 // to its alignment.
-if (T0->isHalfTy())
+if (T0->isHalfTy() && SourceSize > 4)
   T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
 // If we can't get a second FP type, return a simple half or float.
 // avx512fp16-abi.c:pr51813_2 shows it works to return float for
@@ -3461,7 +3465,9 @@
 return llvm::FixedVectorType::get(T0, 2);
 
   if (T0->isHalfTy() && T1->isHalfTy()) {
-llvm::Type *T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
+llvm::Type *T2 = nullptr;
+if (SourceSize > 4)
+  T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
 if (T2 == nullptr)
   return llvm::FixedVectorType::get(T0, 2);
 return llvm::FixedVectorType::get(T0, 4);


Index: clang/test/CodeGen/X86/va-arg-sse.c
===
--- clang/test/CodeGen/X86/va-arg-sse.c
+++ clang/test/CodeGen/X86/va-arg-sse.c
@@ -34,16 +34,16 @@
 // CHECK-NEXT:[[REG_SAVE_AREA:%.*]] = load i8*, i8** [[TMP0]], align 16
 // CHECK-NEXT:[[TMP1:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i32 [[FP_OFFSET]]
 // CHECK-NEXT:[[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 16
-// CHECK-NEXT:[[TMP3:%.*]] = bitcast %struct.S* [[TMP]] to { <2 x float>, <2 x float> }*
+// CHECK-NEXT:[[TMP3:%.*]] = bitcast %struct.S* [[TMP]] to { <2 x float>, float }*
 // C

[PATCH] D110037: [X86] Always check the size of SourceTy before getting the next type

2021-09-20 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/test/CodeGen/X86/va-arg-sse.c:2
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // RUN: %clang_cc1 %s -O2 -emit-llvm -o - -triple x86_64-unknown-unknown | 
FileCheck %s
 

Meinersbur wrote:
> lebedev.ri wrote:
> > Please don't use `-O*` in clang irgen tests.
> > This should *only* test what IR is produced by clang itself.
> I agree here, testing `-O*` output will break easily with any unrelated 
> change in LLVM.
You are right. The test case looks more clear now.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D110037/new/

https://reviews.llvm.org/D110037

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109607: [X86] Refactor GetSSETypeAtOffset to fix pr51813

2021-09-20 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

In D109607#3008486 , @Meinersbur 
wrote:

> However, my other one that is connected to lab.llvm.org has failed as well 
> and should have sent an email: 
> https://lab.llvm.org/buildbot/#/builders/102/builds/2722. Unfortunately it is 
> slow and packing to many commits together, which I am trying to improve: 
> D110048 

I didn't receive it either. I once suspected my mailbox but haven't had any 
fortune for now. :(

> Independent of that, it seems no other builder running the test-suite on 
> x86_64 (there are armv7, s390x and ppc64le ones)

Thanks for your information.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109607/new/

https://reviews.llvm.org/D109607

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D109607: [X86] Refactor GetSSETypeAtOffset to fix pr51813

2021-09-20 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added a comment.

In D109607#3009412 , @Meinersbur 
wrote:

> In D109607#3009377 , @pengfei wrote:
>
>> In D109607#3008486 , @Meinersbur 
>> wrote:
>>
>>> However, my other one that is connected to lab.llvm.org has failed as well 
>>> and should have sent an email: 
>>> https://lab.llvm.org/buildbot/#/builders/102/builds/2722. Unfortunately it 
>>> is slow and packing to many commits together, which I am trying to improve: 
>>> D110048 
>>
>> I didn't receive it either. I once suspected my mailbox but haven't had any 
>> fortune for now. :(
>
> Might have because that somehow the master thinks the job before the one that 
> is breaking is still building, ie. cannot identify whether the failure is new.

Got it, thanks Michael.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109607/new/

https://reviews.llvm.org/D109607

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


  1   2   3   >