from:"Evgenii Kudriashov via cfe\-commits"

[clang] [X86][AVX10] Permit AVX512 options/features used together with AVX10 (PR #71318)

2023-11-08 Thread Evgenii Kudriashov via cfe-commits



@@ -131,35 +135,50 @@ bool X86TargetInfo::initFeatureMap(
   continue;
 }
 
-if (Feature.substr(0, 7) == "+avx10.") {
-  HasAVX10 = true;
-  HasAVX512F = true;
-  if (Feature.substr(Feature.size() - 3, 3) == "512") {
-HasEVEX512 = true;
-  } else if (Feature.substr(7, 2) == "1-") {
-HasEVEX512 = false;
+if (Feature.substr(1, 6) == "avx10.") {
+  if (Feature[0] == '+') {
+HasAVX10 = true;
+if (Feature.substr(Feature.size() - 3, 3) == "512")
+  HasAVX10_512 = true;
+LastAVX10 = Feature;
+  } else if (HasAVX10 && Feature == "-avx10.1-256") {
+HasAVX10 = false;
+HasAVX10_512 = false;
+  } else if (HasAVX10_512 && Feature == "-avx10.1-512") {
+HasAVX10_512 = false;
   }
+  // Postpone AVX10 features handling after AVX512 settled.
+  UpdatedAVX10FeaturesVec.push_back(Feature);
+  continue;
 } else if (!HasAVX512F && Feature.substr(0, 7) == "+avx512") {
   HasAVX512F = true;
+  LastAVX512 = Feature;
 } else if (HasAVX512F && Feature == "-avx512f") {
   HasAVX512F = false;
-} else if (HasAVX10 && Feature == "-avx10.1-256") {
-  HasAVX10 = false;
-  HasAVX512F = false;
-} else if (!HasEVEX512 && Feature == "+evex512") {
+} else if (HasEVEX512 != true && Feature == "+evex512") {
   HasEVEX512 = true;
-} else if (HasEVEX512 && Feature == "-avx10.1-512") {
-  HasEVEX512 = false;
-} else if (HasEVEX512 && Feature == "-evex512") {
+  continue;
+} else if (HasEVEX512 != false && Feature == "-evex512") {
   HasEVEX512 = false;
+  continue;
 }
 
 UpdatedFeaturesVec.push_back(Feature);
   }
-  if (HasAVX512F && HasEVEX512)
-UpdatedFeaturesVec.push_back("+evex512");
-  else if (HasAVX10)
-UpdatedFeaturesVec.push_back("-evex512");
+  llvm::append_range(UpdatedFeaturesVec, UpdatedAVX10FeaturesVec);

e-kud wrote:

Nope, there is a `continue` in handling `avx10*` features

https://github.com/llvm/llvm-project/pull/71318
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86][AVX10] Permit AVX512 options/features used together with AVX10 (PR #71318)

2023-11-08 Thread Evgenii Kudriashov via cfe-commits


e-kud wrote:

I'm a little bit confused, What's the expected behavior of `+avx10.1-512 
-avx10.1-256` in codegen aspect. Should we generate only instructions in the 
difference of sets? Or do we consider `avx10.1-256` as a base of `avx10.1-512` 
and if it is disabled `avx10.1-512` can't be enabled?

https://github.com/llvm/llvm-project/pull/71318
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86][AVX10] Permit AVX512 options/features used together with AVX10 (PR #71318)

2023-11-09 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. Thanks!

https://github.com/llvm/llvm-project/pull/71318
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86][AVX10] Fix a bug when using -march with no-evex512 attribute (PR #72126)

2023-11-13 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

Interesting. LGTM

https://github.com/llvm/llvm-project/pull/72126
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86][AVX10] Allow 64-bit mask register used without EVEX512 (PR #75571)

2023-12-15 Thread Evgenii Kudriashov via cfe-commits


e-kud wrote:

LGTM. Thanks!

https://github.com/llvm/llvm-project/pull/75571
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang-tools-extra] [llvm] [X86][GlobalISel] Remove G_OR/G_AND/G_XOR test duplication (NFC) (PR #79088)

2024-01-25 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud updated https://github.com/llvm/llvm-project/pull/79088

>From 8ba23b70c07f21be03102b2975046ca9a5afb90b Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov 
Date: Mon, 22 Jan 2024 18:00:19 -0800
Subject: [PATCH] [X86][GlobalISel] Remove G_OR/G_AND/G_XOR test duplication
 (NFC)

---
 .../test/CodeGen/X86/GlobalISel/and-scalar.ll | 60 ---
 llvm/test/CodeGen/X86/GlobalISel/or-scalar.ll | 60 ---
 .../test/CodeGen/X86/GlobalISel/xor-scalar.ll | 60 ---
 llvm/test/CodeGen/X86/isel-and.ll | 44 ++
 llvm/test/CodeGen/X86/isel-or.ll  | 45 ++
 llvm/test/CodeGen/X86/isel-xor.ll | 45 ++
 6 files changed, 134 insertions(+), 180 deletions(-)
 delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/and-scalar.ll
 delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/or-scalar.ll
 delete mode 100644 llvm/test/CodeGen/X86/GlobalISel/xor-scalar.ll

diff --git a/llvm/test/CodeGen/X86/GlobalISel/and-scalar.ll 
b/llvm/test/CodeGen/X86/GlobalISel/and-scalar.ll
deleted file mode 100644
index 88a7563612e231d..000
--- a/llvm/test/CodeGen/X86/GlobalISel/and-scalar.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s 
-o - | FileCheck %s --check-prefix=ALL
-
-define i32 @test_and_i1(i32 %arg1, i32 %arg2) {
-; ALL-LABEL: test_and_i1:
-; ALL:   # %bb.0:
-; ALL-NEXT:cmpl %esi, %edi
-; ALL-NEXT:sete %al
-; ALL-NEXT:andb %al, %al
-; ALL-NEXT:movzbl %al, %eax
-; ALL-NEXT:andl $1, %eax
-; ALL-NEXT:retq
-  %c = icmp eq i32 %arg1, %arg2
-  %x = and i1 %c , %c
-  %ret = zext i1 %x to i32
-  ret i32 %ret
-}
-
-define i8 @test_and_i8(i8 %arg1, i8 %arg2) {
-; ALL-LABEL: test_and_i8:
-; ALL:   # %bb.0:
-; ALL-NEXT:movl %esi, %eax
-; ALL-NEXT:andb %dil, %al
-; ALL-NEXT:# kill: def $al killed $al killed $eax
-; ALL-NEXT:retq
-  %ret = and i8 %arg1, %arg2
-  ret i8 %ret
-}
-
-define i16 @test_and_i16(i16 %arg1, i16 %arg2) {
-; ALL-LABEL: test_and_i16:
-; ALL:   # %bb.0:
-; ALL-NEXT:movl %esi, %eax
-; ALL-NEXT:andw %di, %ax
-; ALL-NEXT:# kill: def $ax killed $ax killed $eax
-; ALL-NEXT:retq
-  %ret = and i16 %arg1, %arg2
-  ret i16 %ret
-}
-
-define i32 @test_and_i32(i32 %arg1, i32 %arg2) {
-; ALL-LABEL: test_and_i32:
-; ALL:   # %bb.0:
-; ALL-NEXT:movl %esi, %eax
-; ALL-NEXT:andl %edi, %eax
-; ALL-NEXT:retq
-  %ret = and i32 %arg1, %arg2
-  ret i32 %ret
-}
-
-define i64 @test_and_i64(i64 %arg1, i64 %arg2) {
-; ALL-LABEL: test_and_i64:
-; ALL:   # %bb.0:
-; ALL-NEXT:movq %rsi, %rax
-; ALL-NEXT:andq %rdi, %rax
-; ALL-NEXT:retq
-  %ret = and i64 %arg1, %arg2
-  ret i64 %ret
-}
-
diff --git a/llvm/test/CodeGen/X86/GlobalISel/or-scalar.ll 
b/llvm/test/CodeGen/X86/GlobalISel/or-scalar.ll
deleted file mode 100644
index 1edb72ca9b6cfcd..000
--- a/llvm/test/CodeGen/X86/GlobalISel/or-scalar.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s 
-o - | FileCheck %s --check-prefix=ALL
-
-define i32 @test_or_i1(i32 %arg1, i32 %arg2) {
-; ALL-LABEL: test_or_i1:
-; ALL:   # %bb.0:
-; ALL-NEXT:cmpl %esi, %edi
-; ALL-NEXT:sete %al
-; ALL-NEXT:orb %al, %al
-; ALL-NEXT:movzbl %al, %eax
-; ALL-NEXT:andl $1, %eax
-; ALL-NEXT:retq
-  %c = icmp eq i32 %arg1, %arg2
-  %x = or i1 %c , %c
-  %ret = zext i1 %x to i32
-  ret i32 %ret
-}
-
-define i8 @test_or_i8(i8 %arg1, i8 %arg2) {
-; ALL-LABEL: test_or_i8:
-; ALL:   # %bb.0:
-; ALL-NEXT:movl %esi, %eax
-; ALL-NEXT:orb %dil, %al
-; ALL-NEXT:# kill: def $al killed $al killed $eax
-; ALL-NEXT:retq
-  %ret = or i8 %arg1, %arg2
-  ret i8 %ret
-}
-
-define i16 @test_or_i16(i16 %arg1, i16 %arg2) {
-; ALL-LABEL: test_or_i16:
-; ALL:   # %bb.0:
-; ALL-NEXT:movl %esi, %eax
-; ALL-NEXT:orw %di, %ax
-; ALL-NEXT:# kill: def $ax killed $ax killed $eax
-; ALL-NEXT:retq
-  %ret = or i16 %arg1, %arg2
-  ret i16 %ret
-}
-
-define i32 @test_or_i32(i32 %arg1, i32 %arg2) {
-; ALL-LABEL: test_or_i32:
-; ALL:   # %bb.0:
-; ALL-NEXT:movl %esi, %eax
-; ALL-NEXT:orl %edi, %eax
-; ALL-NEXT:retq
-  %ret = or i32 %arg1, %arg2
-  ret i32 %ret
-}
-
-define i64 @test_or_i64(i64 %arg1, i64 %arg2) {
-; ALL-LABEL: test_or_i64:
-; ALL:   # %bb.0:
-; ALL-NEXT:movq %rsi, %rax
-; ALL-NEXT:orq %rdi, %rax
-; ALL-NEXT:retq
-  %ret = or i64 %arg1, %arg2
-  ret i64 %ret
-}
-
diff --git a/llvm/test/CodeGen/X86/GlobalISel/xor-scalar.ll 
b/llvm/test/CodeGen/X86/GlobalISel/xor-scalar.ll
deleted file mode 100644
index 5a256d5875fcb6e..000
--- a/llvm/test/CodeGen/X86/GlobalISel/xor-scalar.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; NOTE: Assertio

[clang-tools-extra] [llvm] [clang] [X86][GlobalISel] Remove G_OR/G_AND/G_XOR test duplication (NFC) (PR #79088)

2024-01-26 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud closed https://github.com/llvm/llvm-project/pull/79088
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [mlir] [compiler-rt] [libunwind] [flang] [lldb] [llvm] [clang-tools-extra] [openmp] [lld] [BranchFolding] Fix missing predecessors of landing-pad (PR #77608)

2024-01-26 Thread Evgenii Kudriashov via cfe-commits



@@ -0,0 +1,80 @@
+; RUN: llc -mtriple=x86_64-pc-windows-msvc %s

e-kud wrote:

It seems we still have this file on `avx512-intel64` worker:
https://lab.llvm.org/buildbot/#/builders/258/builds/12970
https://lab.llvm.org/buildbot/#/builders/258/builds/12971

https://github.com/llvm/llvm-project/pull/77608
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[lld] [llvm] [lldb] [libunwind] [compiler-rt] [clang] [flang] [clang-tools-extra] [mlir] [openmp] [BranchFolding] Fix missing predecessors of landing-pad (PR #77608)

2024-01-26 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud edited https://github.com/llvm/llvm-project/pull/77608
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[lld] [llvm] [lldb] [libunwind] [compiler-rt] [clang] [flang] [clang-tools-extra] [mlir] [openmp] [BranchFolding] Fix missing predecessors of landing-pad (PR #77608)

2024-01-26 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud edited https://github.com/llvm/llvm-project/pull/77608
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[lld] [llvm] [lldb] [libunwind] [compiler-rt] [clang] [flang] [clang-tools-extra] [mlir] [openmp] [BranchFolding] Fix missing predecessors of landing-pad (PR #77608)

2024-01-26 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud edited https://github.com/llvm/llvm-project/pull/77608
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AVX10][Doc] Add documentation about AVX10 options and their attentions (PR #77925)

2024-01-12 Thread Evgenii Kudriashov via cfe-commits



@@ -3963,6 +3963,60 @@ implicitly included in later levels.
 - ``-march=x86-64-v3``: (close to Haswell) AVX, AVX2, BMI1, BMI2, F16C, FMA, 
LZCNT, MOVBE, XSAVE
 - ``-march=x86-64-v4``: AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
 
+`Intel AVX10 ISA `_ is
+a major new vector ISA incorporating the modern vectorization aspects of
+Intel AVX-512. This ISA will be supported on all future Intel processor.

e-kud wrote:

Processor**s**?

https://github.com/llvm/llvm-project/pull/77925
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AVX10][Doc] Add documentation about AVX10 options and their attentions (PR #77925)

2024-01-12 Thread Evgenii Kudriashov via cfe-commits



@@ -3963,6 +3963,60 @@ implicitly included in later levels.
 - ``-march=x86-64-v3``: (close to Haswell) AVX, AVX2, BMI1, BMI2, F16C, FMA, 
LZCNT, MOVBE, XSAVE
 - ``-march=x86-64-v4``: AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
 
+`Intel AVX10 ISA `_ is
+a major new vector ISA incorporating the modern vectorization aspects of
+Intel AVX-512. This ISA will be supported on all future Intel processor.
+Users are supposed to use the new options ``-mavx10.N`` and ``-mavx10.N-512``
+on these processors and should not use traditional AVX512 options anymore.
+
+The ``N`` in ``-mavx10.N`` represents a continuous integer number starting
+from ``1``. ``-mavx10.N`` is an alias of ``-mavx10.N-256``, which means to
+enable all instructions within AVX10 version N at a maximum vector length of
+256 bits. ``-mavx10.N-512`` enables all instructions at a maximum vector
+length of 512 bits, which is a superset of instructions ``-mavx10.N`` enabled.
+
+Current binaries built with AVX512 features can run on Intel AVX10/512 capable
+processor without re-compile, but cannot run on AVX10/256 capable processor.
+Users need to re-compile their code with ``-mavx10.N``, and maybe update some
+code that calling to 512-bit X86 specific intrinsics and passing or returning
+512-bit vector types in function call, if they want to run on AVX10/256 capable
+processor. Binaries built with ``-mavx10.N`` can run on both AVX10/256 and
+AVX10/512 capable processor.
+
+Users can add a ``-mno-evex512`` in the command line with AVX512 options if
+they want to run the binary on both legacy AVX512 and new AVX10/256 capable
+processors. The option has the same constraints as ``-mavx10.N``, i.e.,
+cannot call to 512-bit X86 specific intrinsics and pass or return 512-bit 
vector
+types in function call.
+
+Users should avoid using AVX512 features in function target attributes when
+developing code for AVX10. If they have to do so, they need to add an explicit
+``evex512`` or ``no-evex512`` together with AVX512 features for 512-bit or
+non-512-bit functions respectively to avoid unexpected code generation. Both
+command line option and target attribute of EVEX512 feature can only be used
+with AVX512. They don't affect vector size of AVX10.
+
+User should not mix the use AVX10 and AVX512 options together at any time,
+because the option combinations are conflicting sometimes. For example, a
+combination of ``-mavx512f -mavx10.1-256`` doesn't show a clear intention to
+compiler, since instructions in AVX512F and AVX10.1/256 intersect but do not
+overlap. In this case, compiler will emit warning for it, but the behavior
+is determined. It will generate the same code as option ``-mavx10.1-512``.
+A similar case is ``-mavx512f -mavx10.2-256``, which equals to
+``-mavx10.1-512 -mavx10.2-256``, because ``avx10.2-256`` implies 
``avx10.1-256``
+and ``-mavx512f -mavx10.1-256`` equals to ``-mavx10.1-512``.
+
+There are some new macros introduced with AVX10 support. ``-mavx10.1-256`` will
+enable ``__AVX10_1__`` and ``__EVEX256__``, while ``-mavx10.1-512`` enables
+``__AVX10_1__``, ``__EVEX256__``, ``__EVEX512__``  and ``__AVX10_1_512__``.
+Besides, both ``-mavx10.1-256`` and ``-mavx10.1-512`` will enable all AVX512
+feature specific macros. A AVX512 feature will enable both ``__EVEX256__``,
+``__EVEX512__`` and its own macro. So ``__EVEX512__`` can be used to guard code
+that can run on both legacy AVX512 and AVX10/512 capable processors but cannot
+run on AVX10/256, while a AVX512 macro like ``__AVX512F__`` cannot tell the
+difference among the three options. Users need to check additional macros
+``__AVX10_1__`` and ``__EVEX512__`` if they want to make distinction.

e-kud wrote:

I don't know whether it matters to mention or not, but if a user want to 
compile code with old (before avx10 has been introduced) and new (after the 
introduction) compilers, then the user has to use something like this.

```if !defined(__AVX10_1__) && defined(__AVX512F__) || defined(__AVX512F__) && 
defined(__EVEX512__)```

https://github.com/llvm/llvm-project/pull/77925
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AVX10][Doc] Add documentation about AVX10 options and their attentions (PR #77925)

2024-01-12 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud edited https://github.com/llvm/llvm-project/pull/77925
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AVX10][Doc] Add documentation about AVX10 options and their attentions (PR #77925)

2024-01-12 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud edited https://github.com/llvm/llvm-project/pull/77925
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AVX10][Doc] Add documentation about AVX10 options and their attentions (PR #77925)

2024-01-13 Thread Evgenii Kudriashov via cfe-commits



@@ -3963,6 +3963,60 @@ implicitly included in later levels.
 - ``-march=x86-64-v3``: (close to Haswell) AVX, AVX2, BMI1, BMI2, F16C, FMA, 
LZCNT, MOVBE, XSAVE
 - ``-march=x86-64-v4``: AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
 
+`Intel AVX10 ISA `_ is
+a major new vector ISA incorporating the modern vectorization aspects of
+Intel AVX-512. This ISA will be supported on all future Intel processor.
+Users are supposed to use the new options ``-mavx10.N`` and ``-mavx10.N-512``
+on these processors and should not use traditional AVX512 options anymore.
+
+The ``N`` in ``-mavx10.N`` represents a continuous integer number starting
+from ``1``. ``-mavx10.N`` is an alias of ``-mavx10.N-256``, which means to
+enable all instructions within AVX10 version N at a maximum vector length of
+256 bits. ``-mavx10.N-512`` enables all instructions at a maximum vector
+length of 512 bits, which is a superset of instructions ``-mavx10.N`` enabled.
+
+Current binaries built with AVX512 features can run on Intel AVX10/512 capable
+processor without re-compile, but cannot run on AVX10/256 capable processor.
+Users need to re-compile their code with ``-mavx10.N``, and maybe update some
+code that calling to 512-bit X86 specific intrinsics and passing or returning
+512-bit vector types in function call, if they want to run on AVX10/256 capable
+processor. Binaries built with ``-mavx10.N`` can run on both AVX10/256 and
+AVX10/512 capable processor.
+
+Users can add a ``-mno-evex512`` in the command line with AVX512 options if
+they want to run the binary on both legacy AVX512 and new AVX10/256 capable
+processors. The option has the same constraints as ``-mavx10.N``, i.e.,
+cannot call to 512-bit X86 specific intrinsics and pass or return 512-bit 
vector
+types in function call.
+
+Users should avoid using AVX512 features in function target attributes when
+developing code for AVX10. If they have to do so, they need to add an explicit
+``evex512`` or ``no-evex512`` together with AVX512 features for 512-bit or
+non-512-bit functions respectively to avoid unexpected code generation. Both
+command line option and target attribute of EVEX512 feature can only be used
+with AVX512. They don't affect vector size of AVX10.
+
+User should not mix the use AVX10 and AVX512 options together at any time,
+because the option combinations are conflicting sometimes. For example, a
+combination of ``-mavx512f -mavx10.1-256`` doesn't show a clear intention to
+compiler, since instructions in AVX512F and AVX10.1/256 intersect but do not
+overlap. In this case, compiler will emit warning for it, but the behavior
+is determined. It will generate the same code as option ``-mavx10.1-512``.
+A similar case is ``-mavx512f -mavx10.2-256``, which equals to
+``-mavx10.1-512 -mavx10.2-256``, because ``avx10.2-256`` implies 
``avx10.1-256``
+and ``-mavx512f -mavx10.1-256`` equals to ``-mavx10.1-512``.
+
+There are some new macros introduced with AVX10 support. ``-mavx10.1-256`` will
+enable ``__AVX10_1__`` and ``__EVEX256__``, while ``-mavx10.1-512`` enables
+``__AVX10_1__``, ``__EVEX256__``, ``__EVEX512__``  and ``__AVX10_1_512__``.
+Besides, both ``-mavx10.1-256`` and ``-mavx10.1-512`` will enable all AVX512
+feature specific macros. A AVX512 feature will enable both ``__EVEX256__``,
+``__EVEX512__`` and its own macro. So ``__EVEX512__`` can be used to guard code
+that can run on both legacy AVX512 and AVX10/512 capable processors but cannot
+run on AVX10/256, while a AVX512 macro like ``__AVX512F__`` cannot tell the
+difference among the three options. Users need to check additional macros
+``__AVX10_1__`` and ``__EVEX512__`` if they want to make distinction.

e-kud wrote:

My idea was about scenario when you've migrated your sources to avx10 (e.g. 
using EVEX512 instead of AVX512F) but still have CI with different compilers 
that does not support it. I thought that it may be useful to provide some 
guidance. But I'm OK to leave it up to users. Whether they decide to specify 
versions or avx10 support availability.

https://github.com/llvm/llvm-project/pull/77925
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AVX10][Doc] Add documentation about AVX10 options and their attentions (PR #77925)

2024-01-13 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. Thanks!

https://github.com/llvm/llvm-project/pull/77925
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AVX10][Doc] Add documentation about AVX10 options and their attentions (PR #77925)

2024-01-13 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud edited https://github.com/llvm/llvm-project/pull/77925
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AVX10][Doc] Add documentation about AVX10 options and their attentions (PR #77925)

2024-01-13 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud edited https://github.com/llvm/llvm-project/pull/77925
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[libunwind] [X86][RFC] Support AVX10 options (PR #67278)

2023-10-18 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. Thanks!

https://github.com/llvm/llvm-project/pull/67278
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang-tools-extra] [X86][RFC] Support AVX10 options (PR #67278)

2023-10-18 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. Thanks!

https://github.com/llvm/llvm-project/pull/67278
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86][RFC] Support AVX10 options (PR #67278)

2023-10-18 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. Thanks!

https://github.com/llvm/llvm-project/pull/67278
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86][RFC] Support AVX10 options (PR #67278)

2023-10-17 Thread Evgenii Kudriashov via cfe-commits



@@ -130,17 +131,35 @@ bool X86TargetInfo::initFeatureMap(
   continue;
 }
 
-if (!HasAVX512F && Feature.substr(0, 7) == "+avx512")
+if (Feature.substr(0, 7) == "+avx10.") {
+  HasAVX10 = true;
   HasAVX512F = true;
-if (HasAVX512F && Feature == "-avx512f")
+  if (Feature.substr(Feature.size() - 3, 3) == "512") {
+HasEVEX512 = true;
+  } else if (Feature.substr(7, 2) == "1-") {

e-kud wrote:

I'm not sure whether the comment here is needed or not, but I've had the first 
question: why don't we reset `HasEVEX512` for `avx10.2-256`. And only after 
realization that `avx10.2-256` implies `avx10.1-256`, it makes sense.

https://github.com/llvm/llvm-project/pull/67278
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang-tools-extra] [X86][RFC] Support AVX10 options (PR #67278)

2023-10-17 Thread Evgenii Kudriashov via cfe-commits



@@ -130,17 +131,35 @@ bool X86TargetInfo::initFeatureMap(
   continue;
 }
 
-if (!HasAVX512F && Feature.substr(0, 7) == "+avx512")
+if (Feature.substr(0, 7) == "+avx10.") {
+  HasAVX10 = true;
   HasAVX512F = true;
-if (HasAVX512F && Feature == "-avx512f")
+  if (Feature.substr(Feature.size() - 3, 3) == "512") {
+HasEVEX512 = true;
+  } else if (Feature.substr(7, 2) == "1-") {

e-kud wrote:

I'm not sure whether the comment here is needed or not, but I've had the first 
question: why don't we reset `HasEVEX512` for `avx10.2-256`. And only after 
realization that `avx10.2-256` implies `avx10.1-256`, it makes sense.

https://github.com/llvm/llvm-project/pull/67278
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86][RFC] Support AVX10 options (PR #67278)

2023-10-17 Thread Evgenii Kudriashov via cfe-commits



@@ -130,17 +131,35 @@ bool X86TargetInfo::initFeatureMap(
   continue;
 }
 
-if (!HasAVX512F && Feature.substr(0, 7) == "+avx512")
+if (Feature.substr(0, 7) == "+avx10.") {
+  HasAVX10 = true;
   HasAVX512F = true;
-if (HasAVX512F && Feature == "-avx512f")
+  if (Feature.substr(Feature.size() - 3, 3) == "512") {
+HasEVEX512 = true;
+  } else if (Feature.substr(7, 2) == "1-") {
+HasEVEX512 = false;
+  }
+} else if (!HasAVX512F && Feature.substr(0, 7) == "+avx512") {
+  HasAVX512F = true;
+} else if (HasAVX512F && Feature == "-avx512f") {
+  HasAVX512F = false;
+} else if (HasAVX10 && Feature == "-avx10.1-256") {
+  HasAVX10 = false;
   HasAVX512F = false;
-if (HasEVEX512 && Feature == "-evex512")
+} else if (!HasEVEX512 && Feature == "+evex512") {
+  HasEVEX512 = true;
+} else if (HasEVEX512 && Feature == "-avx10.1-512") {

e-kud wrote:

`+avx10.1-512,-avx10.1-512` effectively means `+avx10.1-256`?

Generally, when I see such flag dangling, I'm used to thinking there is likely 
to be a bug. Do we have some reasoning that it is correct? Or maybe it's just 
my internal bias.

https://github.com/llvm/llvm-project/pull/67278
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [compiler-rt] [llvm] Reland "[X86][AVX10.2] Support AVX10.2 option and VMPSADBW/VADDP[D,H,S] new instructions (#101452)" (PR #101616)

2024-08-02 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/101616
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [compiler-rt] [llvm] Reland "[X86][AVX10.2] Support AVX10.2 option and VMPSADBW/VADDP[D,H,S] new instructions (#101452)" (PR #101616)

2024-08-02 Thread Evgenii Kudriashov via cfe-commits



@@ -978,8 +978,20 @@ static void getAvailableFeatures(unsigned ECX, unsigned 
EDX, unsigned MaxLeaf,
 
   bool HasLeaf24 =
   MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
-  if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24 && ((EBX >> 18) & 1))
-setFeature(FEATURE_AVX10_1_512);
+  if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24) {
+bool Has512Len = (EBX >> 18) & 1;
+int AVX10Ver = EBX & 0xff;
+if (AVX10Ver >= 2) {
+  setFeature(FEATURE_AVX10_2_256);
+  if (Has512Len)
+setFeature(FEATURE_AVX10_2_512);
+}
+if (AVX10Ver >= 1) {

e-kud wrote:

Is the reason of relanding that compiler-rt parses the literal cpuid without 
any implications?

https://github.com/llvm/llvm-project/pull/101616
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [compiler-rt] [llvm] Reland "[X86][AVX10.2] Support AVX10.2 option and VMPSADBW/VADDP[D,H,S] new instructions (#101452)" (PR #101616)

2024-08-02 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud edited https://github.com/llvm/llvm-project/pull/101616
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [compiler-rt] [llvm] Reland "[X86][AVX10.2] Support AVX10.2 option and VMPSADBW/VADDP[D,H,S] new instructions (#101452)" (PR #101616)

2024-08-02 Thread Evgenii Kudriashov via cfe-commits



@@ -223,6 +227,10 @@ InstructionContext RecognizableInstr::insnContext() const {
   insnContext = EVEX_KB_U(IC_EVEX_XD);
 else if (OpPrefix == X86Local::PS)
   insnContext = EVEX_KB_U(IC_EVEX);
+else {
+  errs() << "Instruction does not use a prefix: " << Name << "\n";
+  llvm_unreachable("Invalid prefix");

e-kud wrote:

Oh, I've asked myself some time ago if there is a difference between 
`llvm_unreachable` and `errs()`. The deal is in production build. The message 
from `llvm_unreachable` is not shown.

https://github.com/llvm/llvm-project/pull/101616
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Improve diagnostics for constraints of inline asm (NFC) (PR #96363)

2024-06-21 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud created https://github.com/llvm/llvm-project/pull/96363

Introduce more detailed diagnostics for the constrains. Also provide an 
opportunity for backends to provide detailed diagnostics for target specific 
constraints based on enabled features. We provide features as a pointer 
intentionally because they are not available in some of existing uses. So 
backends need to consider whether features are available or not.

>From 4f8504878da33925609d52912e8d0e1f64c41066 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov 
Date: Fri, 21 Jun 2024 14:00:58 -0700
Subject: [PATCH] [clang] Improve diagnostics for constraints of inline asm

Introduce more detailed diagnostics for the constrains. Also provide an
opportunity for backends to provide detailed diagnostics for target
specific constraints based on enabled features.
---
 .../clang/Basic/DiagnosticCommonKinds.td  | 33 +
 .../clang/Basic/DiagnosticSemaKinds.td|  4 --
 clang/include/clang/Basic/TargetInfo.h| 19 +++--
 clang/lib/Basic/TargetInfo.cpp| 63 +++-
 clang/lib/Basic/Targets/AArch64.cpp   |  6 +-
 clang/lib/Basic/Targets/AArch64.h |  4 +-
 clang/lib/Basic/Targets/AMDGPU.h  |  7 +-
 clang/lib/Basic/Targets/ARC.h |  4 +-
 clang/lib/Basic/Targets/ARM.cpp   |  6 +-
 clang/lib/Basic/Targets/ARM.h |  4 +-
 clang/lib/Basic/Targets/AVR.h |  4 +-
 clang/lib/Basic/Targets/BPF.h |  4 +-
 clang/lib/Basic/Targets/CSKY.cpp  |  6 +-
 clang/lib/Basic/Targets/CSKY.h|  4 +-
 clang/lib/Basic/Targets/DirectX.h |  4 +-
 clang/lib/Basic/Targets/Hexagon.h |  4 +-
 clang/lib/Basic/Targets/Lanai.h   |  4 +-
 clang/lib/Basic/Targets/Le64.h|  4 +-
 clang/lib/Basic/Targets/LoongArch.cpp |  3 +-
 clang/lib/Basic/Targets/LoongArch.h   |  4 +-
 clang/lib/Basic/Targets/M68k.cpp  | 30 
 clang/lib/Basic/Targets/M68k.h|  4 +-
 clang/lib/Basic/Targets/MSP430.h  |  4 +-
 clang/lib/Basic/Targets/Mips.h|  4 +-
 clang/lib/Basic/Targets/NVPTX.h   |  4 +-
 clang/lib/Basic/Targets/PNaCl.h   |  4 +-
 clang/lib/Basic/Targets/PPC.h |  4 +-
 clang/lib/Basic/Targets/RISCV.cpp |  6 +-
 clang/lib/Basic/Targets/RISCV.h   |  4 +-
 clang/lib/Basic/Targets/SPIR.cpp  |  5 +-
 clang/lib/Basic/Targets/SPIR.h|  8 ++-
 clang/lib/Basic/Targets/Sparc.h   |  6 +-
 clang/lib/Basic/Targets/SystemZ.cpp   |  6 +-
 clang/lib/Basic/Targets/SystemZ.h |  4 +-
 clang/lib/Basic/Targets/TCE.h |  4 +-
 clang/lib/Basic/Targets/VE.h  |  4 +-
 clang/lib/Basic/Targets/WebAssembly.h |  4 +-
 clang/lib/Basic/Targets/X86.cpp   |  6 +-
 clang/lib/Basic/Targets/X86.h |  4 +-
 clang/lib/Basic/Targets/XCore.h   |  4 +-
 clang/lib/CodeGen/CGStmt.cpp  | 21 --
 clang/lib/Sema/SemaStmtAsm.cpp| 18 ++---
 clang/test/Sema/asm.c | 72 ++-
 43 files changed, 287 insertions(+), 134 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td 
b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index de758cbe679dc..d4b0862337165 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -309,6 +309,39 @@ def err_asm_invalid_type : Error<
 def err_ms_asm_bitfield_unsupported : Error<
   "an inline asm block cannot have an operand which is a bit-field">;
 
+def asm_invalid_constraint_generic : TextSubstitution<
+  "invalid %select{input|output}0 constraint '%1' in asm">;
+def err_asm_invalid_constraint : Error<
+  "%sub{asm_invalid_constraint_generic}0,1">;
+def err_asm_invalid_constraint_start : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: output constraint must start with"
+  " '=' or '+'">;
+def err_asm_invalid_constraint_rw_clobber : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: early clobber with a read-write"
+  " constraint must be a register">;
+def err_asm_invalid_constraint_mem_or_reg : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: constraint must allow either"
+  " memory or register operands">;
+def err_asm_invalid_constraint_missing_bracket : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: missing ']'">;
+def err_asm_invalid_constraint_wrong_symbol : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: cannot find an output constraint"
+  " with the specified name">;
+def err_asm_invalid_constraint_empty : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: empty constraint has been"
+  " provided">;
+def err_asm_invalid_constraint_oob : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: the index is out of bounds">;
+de

[clang] [clang] Improve diagnostics for constraints of inline asm (NFC) (PR #96363)

2024-06-21 Thread Evgenii Kudriashov via cfe-commits


e-kud wrote:

Initially I've implemented the target errors through std::string. Then changed 
to diag::kind after reading InternalsManual. I'm not sure what is better. The 
drawback of returning diagnoistics by reference is that we can't customize 
them, only fixed messages. Maybe this is not a big problem because we don't 
have constantly changing constraints. 

https://github.com/llvm/llvm-project/pull/96363
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Improve diagnostics for constraints of inline asm (NFC) (PR #96363)

2024-06-24 Thread Evgenii Kudriashov via cfe-commits

e-kud wrote:

> It's really unfortunate to have to add all this asm handling to clang. Can't 
> it rely on backend diagnostic remarks for this?

I've tried to do the similar thing in the backend: 
https://reviews.llvm.org/D152332. The problem I see is that `llc`'s error 
handler ignores errors and continues compilation until something further 
reports a fatal error. In other words we have to return something wrong 
intentionally to continue compilation. On the other hand, we can prepare all 
callers to have an error somewhere inside but there will be a lot of refactor 
because there is no common practice to emit target specific errors. 
`report_fatal_error` is more likable.

```
$ grep -R emitError lib/Target/X86/*
lib/Target/X86/X86FloatingPoint.cpp:  MI.emitError("fixed input regs must 
be last on the x87 stack");
lib/Target/X86/X86FloatingPoint.cpp:  MI.emitError("output regs must be 
last on the x87 stack");
lib/Target/X86/X86FloatingPoint.cpp:  MI.emitError("clobbers must be last 
on the x87 stack");
lib/Target/X86/X86FloatingPoint.cpp:  MI.emitError("implicitly popped regs 
must be last on the x87 stack");
lib/Target/X86/X86PreTileConfig.cpp:static void emitErrorMsg(MachineFunction 
&MF) {
lib/Target/X86/X86PreTileConfig.cpp:  Context.emitError(
lib/Target/X86/X86PreTileConfig.cpp:  emitErrorMsg(MF);
lib/Target/X86/X86PreTileConfig.cpp:  emitErrorMsg(MF);
$ grep -R emitError lib/Target/AArch64/*
lib/Target/AArch64/AArch64AsmPrinter.cpp:BaseGV->getContext().emitError(
$ grep -R emitError lib/Target/RISCV/*
$
```
```
$ grep -R emitError lib/Target/* | wc -l
47
$ grep -R report_fatal_error lib/Target/* | wc -l
675
```
I'm not sure whether it is much better. 

https://github.com/llvm/llvm-project/pull/96363
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Improve diagnostics for constraints of inline asm (NFC) (PR #96363)

2024-06-24 Thread Evgenii Kudriashov via cfe-commits


e-kud wrote:

So, I think this PR still makes sense but without target changes, right?

I've taken a look at the backend and constraints are checked in 
`getRegForInlineAsmConstraint`. We either need to return an error message or 
pass `Context` into it. The former is preferrable because a call of 
`getRegForInlineAsmConstraint` doesn't always mean an error is requried. 
However, in both cases we need to touch all the targets...

https://github.com/llvm/llvm-project/pull/96363
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Improve diagnostics for constraints of inline asm (NFC) (PR #96363)

2024-06-26 Thread Evgenii Kudriashov via cfe-commits



@@ -309,6 +309,39 @@ def err_asm_invalid_type : Error<
 def err_ms_asm_bitfield_unsupported : Error<
   "an inline asm block cannot have an operand which is a bit-field">;
 
+def asm_invalid_constraint_generic : TextSubstitution<
+  "invalid %select{input|output}0 constraint '%1' in asm">;
+def err_asm_invalid_constraint : Error<
+  "%sub{asm_invalid_constraint_generic}0,1">;
+def err_asm_invalid_constraint_start : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: output constraint must start with"
+  " '=' or '+'">;
+def err_asm_invalid_constraint_rw_clobber : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: early clobber with a read-write"
+  " constraint must be a register">;
+def err_asm_invalid_constraint_mem_or_reg : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: constraint must allow either"
+  " memory or register operands">;
+def err_asm_invalid_constraint_missing_bracket : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: missing ']'">;
+def err_asm_invalid_constraint_wrong_symbol : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: cannot find an output constraint"
+  " with the specified name">;

e-kud wrote:

Yes, this is similar to digits
> Input constraints can also be digits (for example, "0"). This indicates that 
> the specified input must be in the same place as the output constraint at the 
> (zero-based) index in the output constraint list. When using asmSymbolicName 
> syntax for the output operands, you may use these names (enclosed in brackets 
> []) instead of digits.

https://github.com/llvm/llvm-project/pull/96363
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Improve diagnostics for constraints of inline asm (NFC) (PR #96363)

2024-06-26 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud edited https://github.com/llvm/llvm-project/pull/96363
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Improve diagnostics for constraints of inline asm (NFC) (PR #96363)

2024-06-26 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud updated https://github.com/llvm/llvm-project/pull/96363

>From 4f8504878da33925609d52912e8d0e1f64c41066 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov 
Date: Fri, 21 Jun 2024 14:00:58 -0700
Subject: [PATCH 1/2] [clang] Improve diagnostics for constraints of inline asm

Introduce more detailed diagnostics for the constrains. Also provide an
opportunity for backends to provide detailed diagnostics for target
specific constraints based on enabled features.
---
 .../clang/Basic/DiagnosticCommonKinds.td  | 33 +
 .../clang/Basic/DiagnosticSemaKinds.td|  4 --
 clang/include/clang/Basic/TargetInfo.h| 19 +++--
 clang/lib/Basic/TargetInfo.cpp| 63 +++-
 clang/lib/Basic/Targets/AArch64.cpp   |  6 +-
 clang/lib/Basic/Targets/AArch64.h |  4 +-
 clang/lib/Basic/Targets/AMDGPU.h  |  7 +-
 clang/lib/Basic/Targets/ARC.h |  4 +-
 clang/lib/Basic/Targets/ARM.cpp   |  6 +-
 clang/lib/Basic/Targets/ARM.h |  4 +-
 clang/lib/Basic/Targets/AVR.h |  4 +-
 clang/lib/Basic/Targets/BPF.h |  4 +-
 clang/lib/Basic/Targets/CSKY.cpp  |  6 +-
 clang/lib/Basic/Targets/CSKY.h|  4 +-
 clang/lib/Basic/Targets/DirectX.h |  4 +-
 clang/lib/Basic/Targets/Hexagon.h |  4 +-
 clang/lib/Basic/Targets/Lanai.h   |  4 +-
 clang/lib/Basic/Targets/Le64.h|  4 +-
 clang/lib/Basic/Targets/LoongArch.cpp |  3 +-
 clang/lib/Basic/Targets/LoongArch.h   |  4 +-
 clang/lib/Basic/Targets/M68k.cpp  | 30 
 clang/lib/Basic/Targets/M68k.h|  4 +-
 clang/lib/Basic/Targets/MSP430.h  |  4 +-
 clang/lib/Basic/Targets/Mips.h|  4 +-
 clang/lib/Basic/Targets/NVPTX.h   |  4 +-
 clang/lib/Basic/Targets/PNaCl.h   |  4 +-
 clang/lib/Basic/Targets/PPC.h |  4 +-
 clang/lib/Basic/Targets/RISCV.cpp |  6 +-
 clang/lib/Basic/Targets/RISCV.h   |  4 +-
 clang/lib/Basic/Targets/SPIR.cpp  |  5 +-
 clang/lib/Basic/Targets/SPIR.h|  8 ++-
 clang/lib/Basic/Targets/Sparc.h   |  6 +-
 clang/lib/Basic/Targets/SystemZ.cpp   |  6 +-
 clang/lib/Basic/Targets/SystemZ.h |  4 +-
 clang/lib/Basic/Targets/TCE.h |  4 +-
 clang/lib/Basic/Targets/VE.h  |  4 +-
 clang/lib/Basic/Targets/WebAssembly.h |  4 +-
 clang/lib/Basic/Targets/X86.cpp   |  6 +-
 clang/lib/Basic/Targets/X86.h |  4 +-
 clang/lib/Basic/Targets/XCore.h   |  4 +-
 clang/lib/CodeGen/CGStmt.cpp  | 21 --
 clang/lib/Sema/SemaStmtAsm.cpp| 18 ++---
 clang/test/Sema/asm.c | 72 ++-
 43 files changed, 287 insertions(+), 134 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td 
b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index de758cbe679dc..d4b0862337165 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -309,6 +309,39 @@ def err_asm_invalid_type : Error<
 def err_ms_asm_bitfield_unsupported : Error<
   "an inline asm block cannot have an operand which is a bit-field">;
 
+def asm_invalid_constraint_generic : TextSubstitution<
+  "invalid %select{input|output}0 constraint '%1' in asm">;
+def err_asm_invalid_constraint : Error<
+  "%sub{asm_invalid_constraint_generic}0,1">;
+def err_asm_invalid_constraint_start : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: output constraint must start with"
+  " '=' or '+'">;
+def err_asm_invalid_constraint_rw_clobber : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: early clobber with a read-write"
+  " constraint must be a register">;
+def err_asm_invalid_constraint_mem_or_reg : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: constraint must allow either"
+  " memory or register operands">;
+def err_asm_invalid_constraint_missing_bracket : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: missing ']'">;
+def err_asm_invalid_constraint_wrong_symbol : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: cannot find an output constraint"
+  " with the specified name">;
+def err_asm_invalid_constraint_empty : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: empty constraint has been"
+  " provided">;
+def err_asm_invalid_constraint_oob : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: the index is out of bounds">;
+def err_asm_invalid_constraint_missing : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: references to a non-existing 
output"
+  " constraint">;
+def err_asm_invalid_constraint_wrongly_tied : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: tied constraint must be tied to"
+  " the same operand referenced to by the number">;
+def err_asm_invalid_constra

[clang] [clang] Improve diagnostics for constraints of inline asm (NFC) (PR #96363)

2024-06-26 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud updated https://github.com/llvm/llvm-project/pull/96363

>From 4f8504878da33925609d52912e8d0e1f64c41066 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov 
Date: Fri, 21 Jun 2024 14:00:58 -0700
Subject: [PATCH 1/3] [clang] Improve diagnostics for constraints of inline asm

Introduce more detailed diagnostics for the constrains. Also provide an
opportunity for backends to provide detailed diagnostics for target
specific constraints based on enabled features.
---
 .../clang/Basic/DiagnosticCommonKinds.td  | 33 +
 .../clang/Basic/DiagnosticSemaKinds.td|  4 --
 clang/include/clang/Basic/TargetInfo.h| 19 +++--
 clang/lib/Basic/TargetInfo.cpp| 63 +++-
 clang/lib/Basic/Targets/AArch64.cpp   |  6 +-
 clang/lib/Basic/Targets/AArch64.h |  4 +-
 clang/lib/Basic/Targets/AMDGPU.h  |  7 +-
 clang/lib/Basic/Targets/ARC.h |  4 +-
 clang/lib/Basic/Targets/ARM.cpp   |  6 +-
 clang/lib/Basic/Targets/ARM.h |  4 +-
 clang/lib/Basic/Targets/AVR.h |  4 +-
 clang/lib/Basic/Targets/BPF.h |  4 +-
 clang/lib/Basic/Targets/CSKY.cpp  |  6 +-
 clang/lib/Basic/Targets/CSKY.h|  4 +-
 clang/lib/Basic/Targets/DirectX.h |  4 +-
 clang/lib/Basic/Targets/Hexagon.h |  4 +-
 clang/lib/Basic/Targets/Lanai.h   |  4 +-
 clang/lib/Basic/Targets/Le64.h|  4 +-
 clang/lib/Basic/Targets/LoongArch.cpp |  3 +-
 clang/lib/Basic/Targets/LoongArch.h   |  4 +-
 clang/lib/Basic/Targets/M68k.cpp  | 30 
 clang/lib/Basic/Targets/M68k.h|  4 +-
 clang/lib/Basic/Targets/MSP430.h  |  4 +-
 clang/lib/Basic/Targets/Mips.h|  4 +-
 clang/lib/Basic/Targets/NVPTX.h   |  4 +-
 clang/lib/Basic/Targets/PNaCl.h   |  4 +-
 clang/lib/Basic/Targets/PPC.h |  4 +-
 clang/lib/Basic/Targets/RISCV.cpp |  6 +-
 clang/lib/Basic/Targets/RISCV.h   |  4 +-
 clang/lib/Basic/Targets/SPIR.cpp  |  5 +-
 clang/lib/Basic/Targets/SPIR.h|  8 ++-
 clang/lib/Basic/Targets/Sparc.h   |  6 +-
 clang/lib/Basic/Targets/SystemZ.cpp   |  6 +-
 clang/lib/Basic/Targets/SystemZ.h |  4 +-
 clang/lib/Basic/Targets/TCE.h |  4 +-
 clang/lib/Basic/Targets/VE.h  |  4 +-
 clang/lib/Basic/Targets/WebAssembly.h |  4 +-
 clang/lib/Basic/Targets/X86.cpp   |  6 +-
 clang/lib/Basic/Targets/X86.h |  4 +-
 clang/lib/Basic/Targets/XCore.h   |  4 +-
 clang/lib/CodeGen/CGStmt.cpp  | 21 --
 clang/lib/Sema/SemaStmtAsm.cpp| 18 ++---
 clang/test/Sema/asm.c | 72 ++-
 43 files changed, 287 insertions(+), 134 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td 
b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index de758cbe679dc..d4b0862337165 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -309,6 +309,39 @@ def err_asm_invalid_type : Error<
 def err_ms_asm_bitfield_unsupported : Error<
   "an inline asm block cannot have an operand which is a bit-field">;
 
+def asm_invalid_constraint_generic : TextSubstitution<
+  "invalid %select{input|output}0 constraint '%1' in asm">;
+def err_asm_invalid_constraint : Error<
+  "%sub{asm_invalid_constraint_generic}0,1">;
+def err_asm_invalid_constraint_start : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: output constraint must start with"
+  " '=' or '+'">;
+def err_asm_invalid_constraint_rw_clobber : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: early clobber with a read-write"
+  " constraint must be a register">;
+def err_asm_invalid_constraint_mem_or_reg : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: constraint must allow either"
+  " memory or register operands">;
+def err_asm_invalid_constraint_missing_bracket : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: missing ']'">;
+def err_asm_invalid_constraint_wrong_symbol : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: cannot find an output constraint"
+  " with the specified name">;
+def err_asm_invalid_constraint_empty : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: empty constraint has been"
+  " provided">;
+def err_asm_invalid_constraint_oob : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: the index is out of bounds">;
+def err_asm_invalid_constraint_missing : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: references to a non-existing 
output"
+  " constraint">;
+def err_asm_invalid_constraint_wrongly_tied : Error<
+  "%sub{asm_invalid_constraint_generic}0,1: tied constraint must be tied to"
+  " the same operand referenced to by the number">;
+def err_asm_invalid_constra

[clang] [clang] Improve diagnostics for constraints of inline asm (NFC) (PR #96363)

2024-06-26 Thread Evgenii Kudriashov via cfe-commits

e-kud wrote:

> I think the specific checks clang is doing here have to be part of clang: in 
> particular, clang needs to translate from gcc syntax to LLVM IR asm syntax, 
> and that requires parsing the constraints. So these checks are necessary, and 
> emitting better diagnostics for checks we need to do anyway seems fine.

Yes. But constraints are checked. The problem here is that they may conflict 
with features. Should this "features+constraint" combination checked in the 
frontend or in the backend?

I like the idea of checking it in the frontend because we may point to the 
specific constraint. In the backend we can point only to the whole expression 
or even emit a generic error without a line.

https://github.com/llvm/llvm-project/pull/96363
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Improve diagnostics for constraints of inline asm (NFC) (PR #96363)

2024-06-28 Thread Evgenii Kudriashov via cfe-commits

e-kud wrote:

> What exactly does it mean for a constraint to conflict with a feature? The 
> only thing I can think of is if it somehow involves a register class that 
> doesn't exist on the target with the current set of target features. I guess 
> we could try to diagnose that, but I'm not sure it's worth duplicating that 
> code.

Yes, indeed. For instance, we have a constraint `x` that must provide `xmm` 
register. But if we compile with `-mno-sse` there are no `xmm`s available. And 
these changes in targets are the prework to handle these cases in clang. This 
is not a special case for X86. ARM, for example, makes some target constraints 
invalid if a feature is not enabled:
https://github.com/llvm/llvm-project/blob/a9c12e481bfef5b2913e2241486f4dd450188cd2/clang/lib/Basic/Targets/ARM.cpp#L1157-L1161

However this implementation is buggy because it doesn't consider function 
attributes. In case of multiversioning in the same TU we will hit compilation 
errors.

>  I'm not sure it's worth duplicating that code.

So, then return to the idea to better diagnose these things from backends?

https://github.com/llvm/llvm-project/pull/96363
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Headers][X86] amxintrin.h - fix attributes according to Intel SDM (PR #122204)

2025-01-08 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud created 
https://github.com/llvm/llvm-project/pull/122204

`tileloadd`, `tileloaddt1` and `tilestored` are part of `amx-tile` feature.

The problem is observed if `__tile_loadd` intrinsic is invoked, 
`_tile_loadd_internal` requiring `amx-int8` is inlined into `__tile_loadd` that 
has only `amx-tile`.

>From c4b07dd5c89ec97a59a3f30edaadbe50422bf87e Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov 
Date: Wed, 8 Jan 2025 17:15:29 -0800
Subject: [PATCH] [Headers][X86] amxintrin.h - fix attributes according to SDM

tileloadd, tileloaddtr1 and tilestored are part of amx-tile feature.
---
 clang/lib/Headers/amxintrin.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Headers/amxintrin.h b/clang/lib/Headers/amxintrin.h
index b0140615677f27..a7da10d9951e7e 100644
--- a/clang/lib/Headers/amxintrin.h
+++ b/clang/lib/Headers/amxintrin.h
@@ -234,7 +234,7 @@ typedef int _tile1024i_1024a
 __attribute__((__vector_size__(1024), __aligned__(1024)));
 
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8
+static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE
 _tile_loadd_internal(unsigned short m, unsigned short n, const void *base,
  __SIZE_TYPE__ stride) {
   return __builtin_ia32_tileloadd64_internal(m, n, base,
@@ -242,7 +242,7 @@ _tile_loadd_internal(unsigned short m, unsigned short n, 
const void *base,
 }
 
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8
+static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE
 _tile_loaddt1_internal(unsigned short m, unsigned short n, const void *base,
__SIZE_TYPE__ stride) {
   return __builtin_ia32_tileloaddt164_internal(m, n, base,
@@ -278,7 +278,7 @@ _tile_dpbuud_internal(unsigned short m, unsigned short n, 
unsigned short k,
 }
 
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ void __DEFAULT_FN_ATTRS_INT8
+static __inline__ void __DEFAULT_FN_ATTRS_TILE
 _tile_stored_internal(unsigned short m, unsigned short n, void *base,
   __SIZE_TYPE__ stride, _tile1024i tile) {
   return __builtin_ia32_tilestored64_internal(m, n, base,

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Headers][X86] amxintrin.h - fix attributes according to Intel SDM (PR #122204)

2025-01-09 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud updated 
https://github.com/llvm/llvm-project/pull/122204

>From c4b07dd5c89ec97a59a3f30edaadbe50422bf87e Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov 
Date: Wed, 8 Jan 2025 17:15:29 -0800
Subject: [PATCH 1/2] [Headers][X86] amxintrin.h - fix attributes according to
 SDM

tileloadd, tileloaddtr1 and tilestored are part of amx-tile feature.
---
 clang/lib/Headers/amxintrin.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Headers/amxintrin.h b/clang/lib/Headers/amxintrin.h
index b0140615677f27..a7da10d9951e7e 100644
--- a/clang/lib/Headers/amxintrin.h
+++ b/clang/lib/Headers/amxintrin.h
@@ -234,7 +234,7 @@ typedef int _tile1024i_1024a
 __attribute__((__vector_size__(1024), __aligned__(1024)));
 
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8
+static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE
 _tile_loadd_internal(unsigned short m, unsigned short n, const void *base,
  __SIZE_TYPE__ stride) {
   return __builtin_ia32_tileloadd64_internal(m, n, base,
@@ -242,7 +242,7 @@ _tile_loadd_internal(unsigned short m, unsigned short n, 
const void *base,
 }
 
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8
+static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE
 _tile_loaddt1_internal(unsigned short m, unsigned short n, const void *base,
__SIZE_TYPE__ stride) {
   return __builtin_ia32_tileloaddt164_internal(m, n, base,
@@ -278,7 +278,7 @@ _tile_dpbuud_internal(unsigned short m, unsigned short n, 
unsigned short k,
 }
 
 /// This is internal intrinsic. C/C++ user should avoid calling it directly.
-static __inline__ void __DEFAULT_FN_ATTRS_INT8
+static __inline__ void __DEFAULT_FN_ATTRS_TILE
 _tile_stored_internal(unsigned short m, unsigned short n, void *base,
   __SIZE_TYPE__ stride, _tile1024i tile) {
   return __builtin_ia32_tilestored64_internal(m, n, base,

>From 2eadd5751278e4426ae9c2f16a176674d22b4654 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov 
Date: Thu, 9 Jan 2025 16:57:09 -0800
Subject: [PATCH 2/2] Split the amx_api.c to create a regression test

---
 clang/test/CodeGen/X86/amx_api.c  | 30 -
 clang/test/CodeGen/X86/amx_tile.c | 37 +++
 2 files changed, 37 insertions(+), 30 deletions(-)
 create mode 100644 clang/test/CodeGen/X86/amx_tile.c

diff --git a/clang/test/CodeGen/X86/amx_api.c b/clang/test/CodeGen/X86/amx_api.c
index 5b6d50da27c6de..d770c03eb06d21 100644
--- a/clang/test/CodeGen/X86/amx_api.c
+++ b/clang/test/CodeGen/X86/amx_api.c
@@ -33,22 +33,6 @@ void test_api(int cond, short row, short col) {
   __tile_stored(buf, STRIDE, c);
 }
 
-void test_tile_loadd(short row, short col) {
-  //CHECK-LABEL: @test_tile_loadd
-  //CHECK-DAG: call x86_amx @llvm.x86.tileloadd64.internal
-  //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx 
{{%.*}})
-  __tile1024i a = {row, col};
-  __tile_loadd(&a, buf, STRIDE);
-}
-
-void test_tile_stream_loadd(short row, short col) {
-  //CHECK-LABEL: @test_tile_stream_loadd
-  //CHECK-DAG: call x86_amx @llvm.x86.tileloaddt164.internal
-  //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx 
{{%.*}})
-  __tile1024i a = {row, col};
-  __tile_stream_loadd(&a, buf, STRIDE);
-}
-
 void test_tile_dpbssd(__tile1024i a, __tile1024i b, __tile1024i c) {
   //CHECK-LABEL: @test_tile_dpbssd
   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> 
{{%.*}})
@@ -81,20 +65,6 @@ void test_tile_dpbuud(__tile1024i a, __tile1024i b, 
__tile1024i c) {
   __tile_dpbuud(&c, a, b);
 }
 
-void test_tile_stored(__tile1024i c) {
-  //CHECK-LABEL: @test_tile_stored
-  //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> 
{{%.*}})
-  //CHECK-DAG: call void @llvm.x86.tilestored64.internal
-  __tile_stored(buf, STRIDE, c);
-}
-
-void test_tile_zero(__tile1024i c) {
-  //CHECK-LABEL: @test_tile_zero
-  //CHECK-DAG: call x86_amx @llvm.x86.tilezero.internal
-  //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx 
{{%.*}})
-  __tile_zero(&c);
-}
-
 void test_tile_dpbf16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
   //CHECK-LABEL: @test_tile_dpbf16ps
   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> 
{{%.*}})
diff --git a/clang/test/CodeGen/X86/amx_tile.c 
b/clang/test/CodeGen/X86/amx_tile.c
new file mode 100644
index 00..1c87ae5ba1eaa7
--- /dev/null
+++ b/clang/test/CodeGen/X86/amx_tile.c
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding 
-triple=x86_64-unknown-unknown  -target-feature +amx-tile  \
+// RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
+
+#include 
+
+char buf[1024];
+#define STRIDE 32
+
+void test_tile_loadd(short ro

[clang] [llvm] [X86][AVX10.2-MINMAX][NFC] Remove NE[P] from intrinsic and instruction (PR #123272)

2025-01-20 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. Couldn't grep something `minmax`-like that hasn't been covered.

https://github.com/llvm/llvm-project/pull/123272
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86][AVX10.2-BF16] Remove [NE]P from intrinsic and instruction name (PR #123335)

2025-01-22 Thread Evgenii Kudriashov via cfe-commits



@@ -5385,120 +5385,120 @@ let Features = "avx10.2-256", Attributes = [NoThrow, 
Const, RequiredVectorWidth<
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
-  def vcmppbf16512_mask : X86Builtin<"unsigned int(_Vector<32, __bf16>, 
_Vector<32, __bf16>, _Constant int, unsigned int)">;
+  def vcmpbf16512_mask : X86Builtin<"unsigned int(_Vector<32, __bf16>, 
_Vector<32, __bf16>, _Constant int, unsigned int)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {
-  def vcmppbf16256_mask : X86Builtin<"unsigned short(_Vector<16, __bf16>, 
_Vector<16, __bf16>, _Constant int, unsigned short)">;
+  def vcmpbf16256_mask : X86Builtin<"unsigned short(_Vector<16, __bf16>, 
_Vector<16, __bf16>, _Constant int, unsigned short)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
-  def vcmppbf16128_mask : X86Builtin<"unsigned char(_Vector<8, __bf16>, 
_Vector<8, __bf16>, _Constant int, unsigned char)">;
-  def vfpclasspbf16128_mask : X86Builtin<"unsigned char(_Vector<8, __bf16>, 
_Constant int, unsigned char)">;
+  def vcmpbf16128_mask : X86Builtin<"unsigned char(_Vector<8, __bf16>, 
_Vector<8, __bf16>, _Constant int, unsigned char)">;
+  def vfpclassbf16128_mask : X86Builtin<"unsigned char(_Vector<8, __bf16>, 
_Constant int, unsigned char)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {
-  def vfpclasspbf16256_mask : X86Builtin<"unsigned short(_Vector<16, __bf16>, 
_Constant int, unsigned short)">;
+  def vfpclassbf16256_mask : X86Builtin<"unsigned short(_Vector<16, __bf16>, 
_Constant int, unsigned short)">;
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
-  def vfpclasspbf16512_mask : X86Builtin<"unsigned int(_Vector<32, __bf16>, 
_Constant int, unsigned int)">;
+  def vfpclassbf16512_mask : X86Builtin<"unsigned int(_Vector<32, __bf16>, 
_Constant int, unsigned int)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
-  def vscalefpbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, 
__bf16>, _Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">;
+  def vscalefbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, 
_Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {
-  def vscalefpbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, 
__bf16>, _Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">;
+  def vscalefbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, 
__bf16>, _Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">;
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
-  def vscalefpbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, 
__bf16>, _Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">;
+  def vscalefbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, 
__bf16>, _Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
-  def vrcppbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, 
_Vector<8, __bf16>, unsigned char)">;
+  def vrcpbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, 
_Vector<8, __bf16>, unsigned char)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {
-  def vrcppbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, 
_Vector<16, __bf16>, unsigned short)">;
+  def vrcpbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, 
_Vector<16, __bf16>, unsigned short)">;
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
-  def vrcppbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, 
_Vector<32, __bf16>, unsigned int)">;
+  def vrcpbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, 
_Vector<32, __bf16>, unsigned int)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
-  def vgetexppbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, 
__bf16>, _Vector<8, __bf16>, unsigned char)">;
+  def vgetexpbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, 
_Vector<8, __bf16>, unsigned char)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {
-  def vgetexppbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, 
__bf16>, _Vector<16, __bf16>, unsigned short)">;
+  def vgetexpbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, 
__bf16>, _Vector<16, __bf16>, unsigned short)">;
 }
 
 let Features = "avx10.2-512", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
-  def vgetexppbf

[clang] [llvm] [X86][AVX10.2-BF16] Remove [NE]P from intrinsic and instruction name (PR #123335)

2025-01-23 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. However some clang-format changes for 
`llvm/lib/Target/X86/X86InstrFMA3Info.cpp` look reasonable for me.

https://github.com/llvm/llvm-project/pull/123335
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86][AVX10.2-BF16] Update VCOMISBF16 intrinsics and instructions (PR #123307)

2025-01-23 Thread Evgenii Kudriashov via cfe-commits



@@ -5376,12 +5376,12 @@ let Features = "avx10.2-512", Attributes = [NoThrow, 
Const, RequiredVectorWidth<
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
-  def vcomsbf16eq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
-  def vcomsbf16lt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
-  def vcomsbf16neq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
-  def vcomsbf16ge : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
-  def vcomsbf16gt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
-  def vcomsbf16le : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomisbf16eq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomisbf16lt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomisbf16neq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, 
__bf16>)">;
+  def vcomisbf16ge : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomisbf16gt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomisbf16le : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;

e-kud wrote:

I like it, thanks!

https://github.com/llvm/llvm-project/pull/123307
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86][AVX10.2-BF16] Update VCOMISBF16 intrinsics and instructions (PR #123307)

2025-01-20 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. Thanks!

https://github.com/llvm/llvm-project/pull/123307
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86][AVX10.2-BF16] Update VCOMISBF16 intrinsics and instructions (PR #123307)

2025-01-20 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud edited https://github.com/llvm/llvm-project/pull/123307
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86][AVX10.2-BF16] Update VCOMISBF16 intrinsics and instructions (PR #123307)

2025-01-20 Thread Evgenii Kudriashov via cfe-commits



@@ -5376,12 +5376,12 @@ let Features = "avx10.2-512", Attributes = [NoThrow, 
Const, RequiredVectorWidth<
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
-  def vcomsbf16eq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
-  def vcomsbf16lt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
-  def vcomsbf16neq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
-  def vcomsbf16ge : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
-  def vcomsbf16gt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
-  def vcomsbf16le : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomisbf16eq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomisbf16lt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomisbf16neq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, 
__bf16>)">;
+  def vcomisbf16ge : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomisbf16gt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vcomisbf16le : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">;

e-kud wrote:

Maybe sort them in some order?

https://github.com/llvm/llvm-project/pull/123307
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86][AVX10] Disable m[no-]avx10.1 and switch m[no-]avx10.2 to alias of 512 bit options (PR #124511)

2025-01-28 Thread Evgenii Kudriashov via cfe-commits


e-kud wrote:

I've been playing around and found that `-mavx10.2 -mno-avx10.2-512` enables 
`avx10.1-512` but `-mavx10.2-512 -mno-avx10.2-512` obviously doesn't. Does it 
make sense? It happens because when options match, they are eliminated before 
processing. But this is a problem not related to the PR.

https://github.com/llvm/llvm-project/pull/124511
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86][AVX10] Disable m[no-]avx10.1 and switch m[no-]avx10.2 to alias of 512 bit options (PR #124511)

2025-01-28 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. I think complete disable of AVX10 versions later or equal than specified 
in `-mno` is more clear than implicitly disabling only 512 version instructions.

https://github.com/llvm/llvm-project/pull/124511
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86][AVX10] Disable m[no-]avx10.1 and switch m[no-]avx10.2 to alias of 512 bit options (PR #124511)

2025-01-29 Thread Evgenii Kudriashov via cfe-commits


e-kud wrote:

> Let me know whether you are happy with this solution or not.

Yes, thanks, I agree, this is better. Since we disable AVX10 version completely 
with any of options 256 or 512 (some kind of duplication), it means that we 
can't partially disable it, so these options doesn't make much sense.

And some uncommon scenarios are also available e.g., to benchmark a function 
compiled with `avx10.2-256` in `avx10.2-512` context we need to use 
`__attribute__((target("no-avx10.1-512")))`. So basically `no-avx10.1-512` is 
an option to disable 512 bit instructions of AVX10 regardless of enabled AVX10.x


https://github.com/llvm/llvm-project/pull/124511
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Headers][X86] amxintrin.h - fix attributes according to Intel SDM (PR #122204)

2025-01-10 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud closed https://github.com/llvm/llvm-project/pull/122204
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Headers][X86] amxintrin.h - fix attributes according to Intel SDM (PR #122204)

2025-01-08 Thread Evgenii Kudriashov via cfe-commits


e-kud wrote:

I'm not sure if tests are needed. Let me know.

https://github.com/llvm/llvm-project/pull/122204
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86][AVX10.2] Use 's_' for saturate-convert intrinsics (PR #131592)

2025-03-20 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/131592
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86][AVX10.2] Remove YMM rounding from VCVT[,T]PS2I[,U]BS (PR #132426)

2025-03-21 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud edited https://github.com/llvm/llvm-project/pull/132426
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86][AVX10.2] Remove YMM rounding from VCVT[,T]PS2I[,U]BS (PR #132426)

2025-03-21 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/132426
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86][AVX10] Remove VAES and VPCLMULQDQ feature from AVX10.1 (PR #135489)

2025-04-15 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. For the particular arches, `vaes`, `vpclmulqdq` are inherited from 
`ICLAdditionalFeatures`. So we shouldn't miss them in the future.

https://github.com/llvm/llvm-project/pull/135489
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86][CodeGen] - Use shift operators for const value shifts, instead of built-ins for SSE emulation of MMX intrinsics. (PR #129197)

2025-02-28 Thread Evgenii Kudriashov via cfe-commits



@@ -1115,11 +1115,11 @@ _mm_srl_si64(__m64 __m, __m64 __count)
 /// \param __count
 ///A 32-bit integer value.
 /// \returns A 64-bit integer vector containing the right-shifted value.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srli_si64(__m64 __m, int __count)
-{
-return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m),
-  __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_si64(__m64 __m,
+  int __count) {
+  if (__builtin_constant_p(__count))
+return (__m64)((__count > 63) ? 0 : ((long long)__m >> __count));
+  return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m), 
__count));

e-kud wrote:

I'd like to notice that we change the behavior for negative shifts. Before this 
change we returned a zero, now we return it as is because shift on negative 
number is UB. Intrinsic description doesn't specify what should happen in case 
of negative `__count`

https://github.com/llvm/llvm-project/pull/129197
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86][CodeGen] - Use shift operators for const value shifts, instead of built-ins for SSE emulation of MMX intrinsics. (PR #129197)

2025-02-28 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud edited https://github.com/llvm/llvm-project/pull/129197
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86][AVX10.2] Replace nepbh with bf16 to match with others, NFCI (PR #134240)

2025-04-03 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. Looks like the last nep.

https://github.com/llvm/llvm-project/pull/134240
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86] Remove CLDEMOTE from Alderlake and later hybrid processors (PR #144662)

2025-06-18 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. Thanks!

https://github.com/llvm/llvm-project/pull/144662
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86] [clang] Add missing check line for diamondrapids (NFC) (PR #145542)

2025-06-24 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud edited https://github.com/llvm/llvm-project/pull/145542
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86] [clang] Add missing check line for diamondrapids (NFC) (PR #145542)

2025-06-24 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. Thanks!

https://github.com/llvm/llvm-project/pull/145542
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [X86] [clang] Add missing check line for diamondrapids (NFC) (PR #145542)

2025-06-26 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud closed https://github.com/llvm/llvm-project/pull/145542
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86][AVX10.2] Decouple AMX-AVX512 from AVX10.2 (PR #148633)

2025-07-14 Thread Evgenii Kudriashov via cfe-commits


e-kud wrote:

TBH I'm not sure if we need to add `avx512f,evex512` to intrinsics attributes 
and/or to `.td` as well.

https://github.com/llvm/llvm-project/pull/148633
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [X86][AVX10.2] Decouple AMX-AVX512 from AVX10.2 (PR #148633)

2025-07-14 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud created 
https://github.com/llvm/llvm-project/pull/148633

According to AVX10.2 rev. 4:

> AMX-AVX512's explicit AVX10.2 sensitivity is removed and the instructions are 
> removed in favor of inclusion in the ISE/SDM. Users of AMX-AVX512 ISA should 
> follow enabling and checking rules for both AMX and Intel® AVX-512/AVX10.

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965

We set `amx-avx512` as implying `amx-tile`, `avx512f` and `evex512` when 
`avx512fp16` and `avx512bf16` need to be specified separately.

>From 97044a895e5df9ec591775589495c7f9f7e855a7 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov 
Date: Mon, 14 Jul 2025 06:21:27 -0700
Subject: [PATCH] [X86][AVX10.2] Decouple AMX-AVX512 from AVX10.2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to AVX10.2 rev. 4:

AMX-AVX512's explicit AVX10.2 sensitivity is removed and the
instructions are removed in favor of inclusion in the ISE/SDM. Users of
AMX-AVX512 ISA should follow enabling and checking rules for both AMX
and Intel® AVX-512/AVX10.

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965

We set amx-avx512 as implying amx-tile, avx512f and evex512 when
avx512fp16 and avx512bf16 need to be specified separately.
---
 clang/include/clang/Basic/BuiltinsX86_64.td   | 20 +---
 clang/lib/Headers/amxavx512intrin.h   | 32 ---
 clang/test/CodeGen/X86/amx_avx512_api.c   |  8 +++--
 clang/test/CodeGen/X86/amxavx512-builtins.c   |  6 +++-
 llvm/lib/Target/X86/X86.td|  3 +-
 llvm/lib/Target/X86/X86InstrAMX.td| 14 
 llvm/lib/TargetParser/X86TargetParser.cpp |  2 +-
 .../CodeGen/X86/amx-across-func-tilemovrow.ll |  8 ++---
 .../test/CodeGen/X86/amx-avx512-intrinsics.ll | 18 +--
 .../CodeGen/X86/amx-tile-avx512-internals.ll  |  4 +--
 10 files changed, 73 insertions(+), 42 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86_64.td 
b/clang/include/clang/Basic/BuiltinsX86_64.td
index f2b35874e3876..fecaaed37a868 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.td
+++ b/clang/include/clang/Basic/BuiltinsX86_64.td
@@ -290,13 +290,19 @@ let Features = "amx-complex,amx-transpose", Attributes = 
[NoThrow] in {
   def tconjtfp16_internal : X86Builtin<"_Vector<256, int>(unsigned short, 
unsigned short, _Vector<256, int>)">;
 }
 
-let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in {
+let Features = "amx-avx512", Attributes = [NoThrow] in {
   def tcvtrowd2ps_internal : X86Builtin<"_Vector<16, float>(unsigned short, 
unsigned short, _Vector<256, int>, unsigned int)">;
+  def tilemovrow_internal : X86Builtin<"_Vector<16, int>(unsigned short, 
unsigned short, _Vector<256, int>, unsigned int)">;
+}
+
+let Features = "amx-avx512,avx512bf16", Attributes = [NoThrow] in {
   def tcvtrowps2bf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned 
short, unsigned short, _Vector<256, int>, unsigned int)">;
   def tcvtrowps2bf16l_internal : X86Builtin<"_Vector<32, __bf16>(unsigned 
short, unsigned short, _Vector<256, int>, unsigned int)">;
+}
+
+let Features = "amx-avx512,avx512fp16", Attributes = [NoThrow] in {
   def tcvtrowps2phh_internal : X86Builtin<"_Vector<32, _Float16>(unsigned 
short, unsigned short, _Vector<256, int>, unsigned int)">;
   def tcvtrowps2phl_internal : X86Builtin<"_Vector<32, _Float16>(unsigned 
short, unsigned short, _Vector<256, int>, unsigned int)">;
-  def tilemovrow_internal : X86Builtin<"_Vector<16, int>(unsigned short, 
unsigned short, _Vector<256, int>, unsigned int)">;
 }
 
 let Features = "amx-tf32", Attributes = [NoThrow] in {
@@ -382,13 +388,19 @@ let Features = "amx-complex,amx-transpose", Attributes = 
[NoThrow] in {
   def tconjtfp16 : X86Builtin<"void(_Constant unsigned char, _Constant 
unsigned char)">;
 }
 
-let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in {
+let Features = "amx-avx512", Attributes = [NoThrow] in {
   def tcvtrowd2ps : X86Builtin<"_Vector<16, float>(_Constant unsigned char, 
unsigned int)">;
+  def tilemovrow : X86Builtin<"_Vector<16, int>(_Constant unsigned char, 
unsigned int)">;
+}
+
+let Features = "amx-avx512,avx512bf16", Attributes = [NoThrow] in {
   def tcvtrowps2bf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned 
char, unsigned int)">;
   def tcvtrowps2bf16l : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned 
char, unsigned int)">;
+}
+
+let Features = "amx-avx512,avx512fp16", Attributes = [NoThrow] in {
   def tcvtrowps2phh : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned 
char, unsigned int)">;
   def tcvtrowps2phl : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned 
char, unsigned int)">;
-  def tilemovrow : X86Builtin<"_Vector<16, int>(_Constant unsigned char, 
unsigned int)">;
 }
 
 let Features = "amx-fp16", Attributes = [NoThrow] in {
diff --git a/clang/lib/Headers/amxavx512intrin.h 
b/clang/lib/Headers/amxavx512intrin.h
index bbde44fc265b3..e6c58e5c138a1 100644
---

[clang] [llvm] [X86] Remove WIDEKL feature from Pantherlake and Clearwaterforest (PR #148184)

2025-07-11 Thread Evgenii Kudriashov via cfe-commits


https://github.com/e-kud approved this pull request.

LGTM. Thanks!

https://github.com/llvm/llvm-project/pull/148184
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

67 matches

Mail list logo