@@ -1035,10 +1077,11 @@ _mm_srli_pi32(__m64 __m, int __count)
/// \param __count
///A 64-bit integer vector interpreted as a single 64-bit integer.
/// \returns A 64-bit integer vector containing the right-shifted value.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static _
@@ -150,8 +150,8 @@ TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "ncV:64:",
"mmx,sse")
TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "ncV:64:", "mmx,sse")
TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "ncV:64:", "mmx,sse")
TARGET_BUILTIN(__builtin_ia32_pshufw, "V
@@ -614,12 +623,15 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b)
///1: Clear the corresponding byte in the destination. \n
///0: Copy the selected source byte to the corresponding byte in the
///destination. \n
-///Bits [3:0] select the source byte to be copied.
@@ -811,10 +843,11 @@ _mm_slli_pi32(__m64 __m, int __count)
///A 64-bit integer vector interpreted as a single 64-bit integer.
/// \returns A 64-bit integer vector containing the left-shifted value. If
/// \a __count is greater or equal to 64, the result is set to 0.
-s
@@ -21,10 +21,29 @@ typedef int __v2si __attribute__((__vector_size__(8)));
typedef short __v4hi __attribute__((__vector_size__(8)));
typedef char __v8qi __attribute__((__vector_size__(8)));
+/* Unsigned types */
+typedef unsigned long long __v1du __attribute__ ((__vector_size
@@ -177,7 +175,10 @@ _mm_abs_epi32(__m128i __a)
/// \returns A 64-bit integer vector containing the concatenated right-shifted
///value.
#define _mm_alignr_pi8(a, b, n) \
- ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
+ ((__m64)__builtin_sh
@@ -124,10 +143,11 @@ _mm_cvtm64_si64(__m64 __m)
///written to the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the converted
///values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
+static __inline__ __m64 __DEFAULT_FN_ATTRS_S
@@ -0,0 +1,29 @@
+USE_XMM=
phoebewang wrote:
What these tests used for? Is your local tool uploaded unintentionally or you
want them to be reviewed as well?
https://github.com/llvm/llvm-project/pull/96540
___
cfe-comm
@@ -494,10 +520,10 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2)
///A 64-bit integer vector of [8 x i8] containing the subtrahends.
/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
///both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -2502,10 +2509,25 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
///A pointer to a 64-bit memory location that will receive the
conditionally
///copied integer values. The address of the memory location does not have
///to be aligned.
-static __inline__ void __DEFAULT_
@@ -21,10 +21,29 @@ typedef int __v2si __attribute__((__vector_size__(8)));
typedef short __v4hi __attribute__((__vector_size__(8)));
typedef char __v8qi __attribute__((__vector_size__(8)));
+/* Unsigned types */
+typedef unsigned long long __v1du __attribute__ ((__vector_size
@@ -0,0 +1,29 @@
+USE_XMM=
phoebewang wrote:
How about we move these old implementations in to a seperate file (or leave
them where they are if you like) and rename them to _dept, so that we don't
rely on old compilers? We can then remove them as well as th
https://github.com/phoebewang closed
https://github.com/llvm/llvm-project/pull/102592
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,34 @@
+/*=== avx10_2copyintrin.h - AVX10.2 Copy intrinsics ---===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache
@@ -1537,3 +1537,67 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C,
"vfnmadd132nepbf16", X86any_
defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16",
X86any_Fnmsub,
X86Fnmsub, SchedWriteFMA>;
}
+
+//
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+ vmovd %xmm2, %xmm1
phoebewang wrote:
Missing memory tests.
https://github.com/llv
@@ -38197,7 +38197,8 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef
Mask,
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits
(MOVSS).
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
- (MaskEltSize == 16 && Subtarg
https://github.com/phoebewang approved this pull request.
LGTM.
https://github.com/llvm/llvm-project/pull/111435
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/phoebewang approved this pull request.
LGTM.
https://github.com/llvm/llvm-project/pull/111001
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -6,13 +6,23 @@
#include
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr bool match_m128(__m128 v, float x, float y, float z, float w) {
+ return v[0] == x && v[1] == y && v[2] == z && v[3] == w;
phoebewang wrote:
Can we templatize it to
@@ -6,13 +6,23 @@
#include
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+constexpr bool match_m128(__m128 v, float x, float y, float z, float w) {
+ return v[0] == x && v[1] == y && v[2] == z && v[3] == w;
+}
+#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
https://github.com/phoebewang edited
https://github.com/llvm/llvm-project/pull/112578
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/phoebewang approved this pull request.
I like the idea to put them together, but I'm not expert in C++. It'd be good
if someone familiar with C++ takes another look.
https://github.com/llvm/llvm-project/pull/112578
___
cfe-commits m
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding
-mattr=+sm4,+avx10.2-512 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-enc
@@ -1647,3 +1647,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1,
hasSideEffects = 0 in
def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
(VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
}
+
+// SM4(EVEX)
+multiclass avx10_sm4_base {
+ // SM4_Base is
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s
--check-prefixes=ATT
phoebewang wrote:
The file name is mistaken with sm4-evex-64.txt
https://github.com/llvm/llvm-project/pull/113402
_
@@ -1647,3 +1647,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1,
hasSideEffects = 0 in
def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
(VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
}
+
+// SM4(EVEX)
+multiclass avx10_sm4_base {
+ // SM4_Base is
@@ -0,0 +1,224 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck
%s
phoebewang wrote:
The same to below.
https://github.com/llvm/llvm-project/pull/113402
___
cfe-commits mailing list
cf
@@ -196,6 +196,8 @@ Changes to the X86 Backend
* Support ISA of `AVX10.2-256` and `AVX10.2-512`.
+* Support ISA of `SM4(EVEX)`.
phoebewang wrote:
We should mainly add in Clang release note.
https://github.com/llvm/llvm-project/pull/113402
__
@@ -6109,6 +6109,12 @@ let TargetPrefix = "x86" in {
DefaultAttrsIntrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
+def int_x86_vsm4key4512 : ClangBuiltin<"__builtin_ia32_vsm4key4512">,
+Intrinsic<[llvm_v16i32_ty], [llvm_v1
@@ -6109,6 +6109,12 @@ let TargetPrefix = "x86" in {
DefaultAttrsIntrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
+def int_x86_vsm4key4512 : ClangBuiltin<"__builtin_ia32_vsm4key4512">,
+Intrinsic<[llvm_v16i32_ty], [llvm_v1
@@ -665,6 +665,9 @@ _storebe_i64(void * __P, long long __D) {
#include
#include
#include
+#if (defined(__SM4__))
phoebewang wrote:
Checking macro is not always working, e.g., function multi-versioning. It's
fine to remove it.
https://github.com/llvm/llvm-
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s
--check-prefixes=ATT
phoebewang wrote:
ditto.
https://github.com/llvm/llvm-project/pull/113402
___
cfe-commits mailing lis
@@ -0,0 +1,98 @@
+/*=== movrs_avx10_2_512intrin.h - AVX512MOVRS intrinsics ---===
phoebewang wrote:
`AVX512MOVRS` -> `AVX10.2-512-MOVRS`
https://github.com/llvm/llvm-project/pull/113274
___
cfe-commits
@@ -7572,3 +7572,42 @@ def int_x86_avx10_vfnmsub231nepbf16128 :
ClangBuiltin<"__builtin_ia32_vfnmsub231
DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty,
llvm_v8bf16_ty, llvm_v8bf16_ty ],
[IntrNoMem]>;
}
+
+let TargetPrefix = "x86"
@@ -261,6 +261,7 @@ X86_FEATURE_COMPAT(AVX10_1, "avx10.1-256",
36)
X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 37)
X86_FEATURE_COMPAT(AVX10_2, "avx10.2-256",0)
X86_FEATURE_COMPAT(AVX10_2_512, "avx10.2-512",
@@ -0,0 +1,174 @@
+/*===-- movrs_avx10_2intrin.h - AVX512MOVRS intrinsics -===
phoebewang wrote:
AVX10.2-MOVRS
https://github.com/llvm/llvm-project/pull/113274
___
cfe-commits mailing list
cfe-commi
@@ -568,6 +568,131 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MI.setDesc(TII->get(Opc));
return true;
}
+ // TILEPAIRLOAD is just for TILEPair spill, we don't have corresponding
+ // AMX instruction to support it. So, split it to 2 load instructions:
@@ -34,9 +34,31 @@ class ShapeT {
if (MRI)
deduceImm(MRI);
}
+ // When ShapeT has mult shapes, we only use Shapes (never use Row and Col)
+ // and ImmShapes. Due to the most case is only one shape (just simply use
+ // Shape.Row or Shape.Col), so here we don't me
@@ -2339,6 +2339,19 @@ TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh512,
"V32yV32yV32yV32y", "ncV:512:", "a
TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh256, "V16yV16yV16yV16y", "ncV:256:",
"avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh128, "V8yV8yV8yV8y", "ncV:128:",
"avx1
@@ -80,28 +80,41 @@ INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,
INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,
"Fast Tile Register Configure", false, false)
-static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
+static unsigned g
@@ -0,0 +1,83 @@
+/*===- amxfp8intrin.h - AMX intrinsics -*- C++
-*===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apa
@@ -267,3 +267,42 @@ let Predicates = [HasAMXCOMPLEX, In64BitMode] in {
}
} // SchedRW = [WriteSystem]
}
+
+// AMX-FP8
+let Predicates = [HasAMXFP8, In64BitMode] in {
+ let SchedRW = [WriteSystem] in {
+let Constraints = "$src1 = $dst" in {
+ class AMX_FP8_BASE
@@ -267,3 +267,42 @@ let Predicates = [HasAMXCOMPLEX, In64BitMode] in {
}
} // SchedRW = [WriteSystem]
}
+
+// AMX-FP8
+let Predicates = [HasAMXFP8, In64BitMode] in {
+ let SchedRW = [WriteSystem] in {
+let Constraints = "$src1 = $dst" in {
+ class AMX_FP8_BASE
@@ -0,0 +1,24 @@
+/*===-- amxfp8intrin.h - AMX intrinsics -*- C++ -*===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,24 @@
+/*===-- amxfp8intrin.h - AMX intrinsics -*- C++ -*===
phoebewang wrote:
Make it aligned to 80 column.
https://github.com/llvm/llvm-project/pull/113850
___
cfe-commits mailing list
@@ -270,6 +270,9 @@ def FeatureAMXFP16 : SubtargetFeature<"amx-fp16",
"HasAMXFP16", "true",
def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX",
"true",
"Support AMX-COMPLEX instructions",
@@ -37503,6 +37503,38 @@
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
+ case X86::PTDPBF8PS:
+ case X86::PTDPBHF8PS:
+ case X86::PTDPHBF8PS:
+ case X86::PTDPHF8PS: {
---
@@ -1876,6 +1876,10 @@ const StringMap sys::getHostCPUFeatures() {
MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
+ bool HasLeaf1E =
+ MaxLevel >= 0x1e && !getX86CpuIDAndInfo(
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s
--check-prefixes=ATT
phoebewang wrote:
Remove `x86-64-` from file name. The same below.
https://github.com/llvm/llvm-project/pull/113850
https://github.com/phoebewang created
https://github.com/llvm/llvm-project/pull/114070
None
>From 587d0105e7724db0f35fc5c8179519fa6319e5c8 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe"
Date: Tue, 29 Oct 2024 22:29:25 +0800
Subject: [PATCH] [X86][AMX] Support AMX-AVX512
---
clang/docs/Release
@@ -5994,6 +5994,23 @@ let TargetPrefix = "x86" in {
[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty,
llvm_x86amx_ty, llvm_x86amx_ty,
llvm_x86amx_ty], []>;
+
+ def int_x86_tdpbf8ps : ClangBuiltin<"__builtin_ia32_
@@ -0,0 +1,92 @@
+/*===- amxfp8intrin.h - AMX intrinsics -*- C++
-*===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apa
@@ -0,0 +1,92 @@
+/*===- amxfp8intrin.h - AMX intrinsics -*- C++
-*===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apa
@@ -0,0 +1,83 @@
+/*===- amxfp8intrin.h - AMX intrinsics -*- C++
-*===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apa
phoebewang wrote:
> Looks like this causes a significant compile-time regression, but only for
> ReleaseLTO-g:
> https://llvm-compile-time-tracker.com/compare.php?from=1e19f0f9d92b5e9c43d53893e387341835d3d96b&to=c72a751dabff4260dcc309e48008941d51b31d21&stat=instructions:u
>
> I wouldn't really
https://github.com/phoebewang edited
https://github.com/llvm/llvm-project/pull/113850
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/phoebewang approved this pull request.
LGTM, need to add new api support as a follow up.
https://github.com/llvm/llvm-project/pull/113850
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/
@@ -0,0 +1,92 @@
+/*===- amxfp8intrin.h - AMX intrinsics -*- C++
-*===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apa
https://github.com/phoebewang closed
https://github.com/llvm/llvm-project/pull/113850
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/phoebewang approved this pull request.
LGTM.
https://github.com/llvm/llvm-project/pull/113871
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/phoebewang created
https://github.com/llvm/llvm-project/pull/113674
None
>From e2ca931bb1efa3d9c6cc4ae9af94a704561ed870 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe"
Date: Fri, 25 Oct 2024 18:40:45 +0800
Subject: [PATCH] [test] Avoid writing to a potentially write-protected
https://github.com/phoebewang closed
https://github.com/llvm/llvm-project/pull/113674
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration
-triple=i386-- -target-feature +movrs -target-feature +avx10.2-512 -emit-llvm
-verify
phoebewang wrote:
Thanks for the report, fixed by
https://github.com/llvm/llvm-proje
@@ -0,0 +1,98 @@
+// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration
-triple=i386-unknown-unknown -target-feature +movrs -target-feature
+avx10.2-256 -emit-llvm -verify
phoebewang wrote:
Done.
https://github.com/llvm/llvm-project/pull/1132
@@ -273,6 +273,9 @@ def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex",
"HasAMXCOMPLEX", "true",
def FeatureAMXFP8 : SubtargetFeature<"amx-fp8", "HasAMXFP8", "true",
"Support AMX-FP8 instructions",
[
@@ -370,6 +370,71 @@ let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
}
} // HasAMXTILE, HasAMXTRANSPOSE
+let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW =
[WriteSystem] in {
+ def T2RPNTLVWZ0RS : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
+
@@ -1880,6 +1880,7 @@ const StringMap sys::getHostCPUFeatures() {
!getX86CpuIDAndInfoEx(0x1e, 0x1, &EAX, &EBX, &ECX, &EDX);
Features["amx-fp8"] = HasLeaf1E && ((EAX >> 4) & 1) && HasAMXSave;
Features["amx-transpose"] = HasLeaf1E && ((EAX >> 5) & 1) && Has
@@ -370,6 +370,71 @@ let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
}
} // HasAMXTILE, HasAMXTRANSPOSE
+let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW =
[WriteSystem] in {
+ def T2RPNTLVWZ0RS : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
+
@@ -0,0 +1,201 @@
+/* ===--- amxmovrstransposeintrin.h - AMX_MOVRS_TRANSPOSE intrinsics -*- C++
+ * -*-===
phoebewang wrote:
Adjust for 1 line.
https://github.com/llvm/llvm-project/pull/115151
___
cfe-commits m
@@ -656,6 +656,11 @@ _storebe_i64(void * __P, long long __D) {
#include
#endif
+#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_MOVRS__)
+#include
+#include
phoebewang wrote:
This needs to be guarded with
```
#if !defined(__SCE__) || __has
@@ -370,6 +370,71 @@ let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
}
} // HasAMXTILE, HasAMXTRANSPOSE
+let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW =
[WriteSystem] in {
+ def T2RPNTLVWZ0RS : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
+
@@ -0,0 +1,14 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
+// RUN: -target-feature +amx-int8 -target-feature +amx-bf16 \
+// RUN: -target-feature +am
@@ -5305,6 +5339,13 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, CNode);
return;
}
+case Intrinsic::x86_t2rpntlvwz0rs:
+case Intrinsic::x86_t2rpntlvwz0rst1:
+case Intrinsic::x86_t2rpntlvwz1rs:
+case Intrinsic::x86_t2rpntlvwz1
phoebewang wrote:
Thanks all! Fixed in https://github.com/llvm/llvm-project/pull/115581
https://github.com/llvm/llvm-project/pull/114070
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/phoebewang closed
https://github.com/llvm/llvm-project/pull/115581
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -151,6 +151,7 @@ set(x86_files
amxfp8intrin.h
amxintrin.h
amxtransposeintrin.h
+ amxmovrsintrin.h
phoebewang wrote:
alphabetical order
https://github.com/llvm/llvm-project/pull/115151
___
cfe-commits maili
@@ -17,6 +17,9 @@
#define __DEFAULT_FN_ATTRS_TRANSPOSE
\
__attribute__((__always_inline__, __nodebug__, __target__("amx-transpose")))
+#define __DEFAULT_FN_ATTRS_TRANSPOSE_MOVRS
\
+ __attribute_
https://github.com/phoebewang closed
https://github.com/llvm/llvm-project/pull/115625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/phoebewang approved this pull request.
LGTM.
https://github.com/llvm/llvm-project/pull/115625
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/phoebewang closed
https://github.com/llvm/llvm-project/pull/115151
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -15,81 +15,214 @@
#define __AMXFP8INTRIN_H
#ifdef __x86_64__
-/// Peform the dot product of a BF8 value \a a by a BF8 value \a b accumulating
-/// into a Single Precision (FP32) source/dest \a dst.
+#define __DEFAULT_FN_ATTRS_FP8
phoebewang wrote:
Missing IR test?
https://github.com/llvm/llvm-project/pull/115829
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/phoebewang approved this pull request.
LGTM.
https://github.com/llvm/llvm-project/pull/115151
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/phoebewang approved this pull request.
LGTM.
https://github.com/llvm/llvm-project/pull/115829
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/phoebewang updated
https://github.com/llvm/llvm-project/pull/115660
>From f2fc493149d75f0be13207bc1893a48c7fab84a3 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe"
Date: Sun, 10 Nov 2024 22:37:15 +0800
Subject: [PATCH] [X86][AMX] Support AMX-TRANSPOSE, part 2
Ref.: https://cdrd
@@ -275,6 +276,27 @@ std::pair
ShapeCalculator::getShape(IntrinsicInst *II,
Col = II->getArgOperand(1);
break;
}
+ case Intrinsic::x86_ttdpbf16ps_internal:
+ case Intrinsic::x86_ttdpfp16ps_internal:
+ case Intrinsic::x86_ttcmmimfp16ps_internal:
+ case Intrinsic::
https://github.com/phoebewang approved this pull request.
We don't use it on X86, so LGTM.
https://github.com/llvm/llvm-project/pull/115991
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-comm
@@ -0,0 +1,248 @@
+/* ===--- amxtransposeintrin.h - AMX_TRANSPOSE intrinsics -*- C++
-*-===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Ap
@@ -0,0 +1,248 @@
+/* ===--- amxtransposeintrin.h - AMX_TRANSPOSE intrinsics -*- C++
-*-===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Ap
@@ -34,9 +34,31 @@ class ShapeT {
if (MRI)
deduceImm(MRI);
}
+ // When ShapeT has mult shapes, we only use Shapes (never use Row and Col)
+ // and ImmShapes. Due to the most case is only one shape (just simply use
+ // Shape.Row or Shape.Col), so here we don't me
@@ -34,9 +34,31 @@ class ShapeT {
if (MRI)
deduceImm(MRI);
}
+ // When ShapeT has mult shapes, we only use Shapes (never use Row and Col)
phoebewang wrote:
Done.
https://github.com/llvm/llvm-project/pull/113532
___
@@ -121,12 +137,96 @@ static Instruction
*getFirstNonAllocaInTheEntryBlock(Function &F) {
llvm_unreachable("No terminator in the entry block!");
}
-static std::pair getShape(IntrinsicInst *II, unsigned OpNo) {
+class ShapeCalculator {
+private:
+ TargetMachine *TM = nullpt
@@ -623,6 +623,37 @@ struct X86Operand final : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::createReg(Reg));
}
+ bool isTILEPair() const {
+return Kind == Register &&
+ X86MCRegisterClasses[X86::TILERegClassID].contains(getReg());
---
https://github.com/phoebewang edited
https://github.com/llvm/llvm-project/pull/113532
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/phoebewang closed
https://github.com/llvm/llvm-project/pull/113532
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -10,3 +10,7 @@ int test_cmpccxadd32(void *__A, int __B, int __C) {
long long test_cmpccxadd64(void *__A, long long __B, long long __C) {
return _cmpccxadd_epi64(__A, __B, __C, 16); // expected-error {{argument
value 16 is outside the valid range [0, 15]}}
}
+
+long long t
https://github.com/phoebewang approved this pull request.
LGTM, thanks!
https://github.com/llvm/llvm-project/pull/114367
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -919,23 +1017,66 @@ bool X86LowerAMXCast::optimizeAMXCastFromPhi(
return true;
}
+static Value *getShapeFromAMXIntrinsic(Value *Inst, unsigned ShapeIdx,
+ bool IsRow) {
+ if (!isAMXIntrinsic(Inst))
+return nullptr;
+
+ auto *II
@@ -0,0 +1,248 @@
+/* ===--- amxtransposeintrin.h - AMX_TRANSPOSE intrinsics -*- C++
-*-===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Ap
601 - 700 of 938 matches
Mail list logo