https://github.com/FreddyLeaf created https://github.com/llvm/llvm-project/pull/113402
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368 >From 83a2ef421831dec1790c3c6adf3141ed1ac9a0d5 Mon Sep 17 00:00:00 2001 From: Freddy Ye <freddy...@intel.com> Date: Mon, 21 Oct 2024 09:30:26 +0800 Subject: [PATCH] [X86] Support SM4 EVEX version intrinsics/instructions. Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368 --- clang/include/clang/Basic/BuiltinsX86.def | 4 + clang/lib/Headers/CMakeLists.txt | 1 + clang/lib/Headers/immintrin.h | 3 + clang/lib/Headers/sm4evexintrin.h | 32 +++ clang/test/CodeGen/X86/sm4-evex-builtins.c | 19 ++ llvm/docs/ReleaseNotes.md | 2 + llvm/include/llvm/IR/IntrinsicsX86.td | 6 + llvm/lib/Target/X86/X86InstrAVX10.td | 20 ++ llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll | 64 +++++ llvm/test/MC/Disassembler/X86/sm4-evex-32.txt | 170 +++++++++++++ llvm/test/MC/Disassembler/X86/sm4-evex-64.txt | 170 +++++++++++++ llvm/test/MC/X86/sm4-evex-32-att.s | 224 ++++++++++++++++++ llvm/test/MC/X86/sm4-evex-32-intel.s | 169 +++++++++++++ llvm/test/MC/X86/sm4-evex-64-att.s | 169 +++++++++++++ llvm/test/MC/X86/sm4-evex-64-intel.s | 169 +++++++++++++ llvm/test/TableGen/x86-fold-tables.inc | 6 + 16 files changed, 1228 insertions(+) create mode 100644 clang/lib/Headers/sm4evexintrin.h create mode 100644 clang/test/CodeGen/X86/sm4-evex-builtins.c create mode 100644 llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll create mode 100644 llvm/test/MC/Disassembler/X86/sm4-evex-32.txt create mode 100644 llvm/test/MC/Disassembler/X86/sm4-evex-64.txt create mode 100644 llvm/test/MC/X86/sm4-evex-32-att.s create mode 100644 llvm/test/MC/X86/sm4-evex-32-intel.s create mode 100644 llvm/test/MC/X86/sm4-evex-64-att.s create mode 100644 llvm/test/MC/X86/sm4-evex-64-intel.s diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 4c6b22cca421ca..4486eb73a11fa6 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -2179,6 +2179,10 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4") TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4") TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4") +// SM4_EVEX +TARGET_BUILTIN(__builtin_ia32_vsm4key4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4") +TARGET_BUILTIN(__builtin_ia32_vsm4rnds4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4") + // AVX10 MINMAX TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256") TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256") diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index ff392e7122a448..6a594dad0b67d2 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -241,6 +241,7 @@ set(x86_files shaintrin.h sm3intrin.h sm4intrin.h + sm4evexintrin.h smmintrin.h tbmintrin.h tmmintrin.h diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 3fbabffa98df20..1b83dd2162707c 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -665,6 +665,9 @@ _storebe_i64(void * __P, long long __D) { #include <avx10_2_512niintrin.h> #include <avx10_2_512satcvtdsintrin.h> #include <avx10_2_512satcvtintrin.h> +#if (defined(__SM4__)) +#include <sm4evexintrin.h> +#endif #endif #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__) diff --git a/clang/lib/Headers/sm4evexintrin.h b/clang/lib/Headers/sm4evexintrin.h new file mode 100644 index 00000000000000..f6ae0037baea03 --- /dev/null +++ b/clang/lib/Headers/sm4evexintrin.h @@ -0,0 +1,32 @@ +/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <sm4evexintrin.h> directly; include <immintrin.h> instead." +#endif // __IMMINTRIN_H + +#ifndef __SM4EVEXINTRIN_H +#define __SM4EVEXINTRIN_H + +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("sm4,avx10.2-512"), __min_vector_width__(512))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_sm4key4_epi32(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B); +} + +#undef __DEFAULT_FN_ATTRS512 + +#endif // __SM4EVEXINTRIN_H diff --git a/clang/test/CodeGen/X86/sm4-evex-builtins.c b/clang/test/CodeGen/X86/sm4-evex-builtins.c new file mode 100644 index 00000000000000..0e54bd008d4fb0 --- /dev/null +++ b/clang/test/CodeGen/X86/sm4-evex-builtins.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-- -target-feature +sm4 \ +// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 %s -ffreestanding -triple=i386-- -target-feature +sm4 \ +// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include <immintrin.h> +#include <stddef.h> + +__m512i test_mm512_sm4key4_epi32(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_sm4key4_epi32( + // CHECK: call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + return _mm512_sm4key4_epi32(__A, __B); +} + +__m512i test_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_sm4rnds4_epi32( + // CHECK: call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + return _mm512_sm4rnds4_epi32(__A, __B); +} diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index e5853789c78b63..16764210537689 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -196,6 +196,8 @@ Changes to the X86 Backend * Support ISA of `AVX10.2-256` and `AVX10.2-512`. +* Support ISA of `SM4(EVEX)`. + Changes to the OCaml bindings ----------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 5262e3154ff721..7725bda1f4f598 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -6109,6 +6109,12 @@ let TargetPrefix = "x86" in { DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_x86_vsm4key4512 : ClangBuiltin<"__builtin_ia32_vsm4key4512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty], + [IntrNoMem]>; +def int_x86_vsm4rnds4512 : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty], + [IntrNoMem]>; } //===----------------------------------------------------------------------===// // RAO-INT intrinsics diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 625f2e01d47218..640011f5ed28d7 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1647,3 +1647,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}", (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>; } + +// SM4(EVEX) +multiclass avx10_sm4_base<string OpStr> { + // SM4_Base is in X86InstrSSE.td. + let Predicates = [HasSM4, HasAVX10_2] in { + defm Z128 : SM4_Base<OpStr, avx512vl_i32_info.info128.RC, + "128", avx512vl_i32_info.info128.LdFrag, + avx512vl_i32_info.info128.MemOp>, EVEX_V128; + defm Z256 : SM4_Base<OpStr, avx512vl_i32_info.info256.RC, + "256", avx512vl_i32_info.info256.LdFrag, + avx512vl_i32_info.info256.MemOp>, EVEX_V256; + } + let Predicates = [HasSM4, HasAVX10_2_512] in + defm Z : SM4_Base<OpStr, avx512vl_i32_info.info512.RC, + "512", avx512vl_i32_info.info512.LdFrag, + avx512vl_i32_info.info512.MemOp>, EVEX_V512; +} + +defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV; +defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV; diff --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll new file mode 100644 index 00000000000000..fc46d3cf23fd41 --- /dev/null +++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s + +define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4key4128: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4key4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B) + ret <4 x i32> %ret +} +declare <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B) + +define <8 x i32> @test_int_x86_vsm4key4256(<8 x i32> %A, <8 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4key4256: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4key4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7e,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B) + ret <8 x i32> %ret +} +declare <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B) + +define <16 x i32> @test_int_x86_vsm4key4512(<16 x i32> %A, <16 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4key4512: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4key4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B) + ret <16 x i32> %ret +} +declare <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B) + +define <4 x i32> @test_int_x86_vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4rnds4128: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4rnds4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7b,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) + ret <4 x i32> %ret +} +declare <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) + +define <8 x i32> @test_int_x86_vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4rnds4256: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4rnds4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7f,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) + ret <8 x i32> %ret +} +declare <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) + +define <16 x i32> @test_int_x86_vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4rnds4512: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4rnds4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) + ret <16 x i32> %ret +} +declare <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) + diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt new file mode 100644 index 00000000000000..f89f4b5a8c0fb8 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt @@ -0,0 +1,170 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vsm4key4 %zmm24, %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmm24 +0x62,0x82,0x46,0x40,0xda,0xf0 + +# ATT: vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%rip), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rip] +0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsm4key4 8128(%rcx), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe2,0x46,0x40,0xda,0x71,0x7f + +# ATT: vsm4key4 -8192(%rdx), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192] +0x62,0xe2,0x46,0x40,0xda,0x72,0x80 + +# ATT: vsm4rnds4 %zmm24, %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmm24 +0x62,0x82,0x47,0x40,0xda,0xf0 + +# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%rip), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rip] +0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsm4rnds4 8128(%rcx), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe2,0x47,0x40,0xda,0x71,0x7f + +# ATT: vsm4rnds4 -8192(%rdx), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192] +0x62,0xe2,0x47,0x40,0xda,0x72,0x80 + +# ATT: vsm4key4 %ymm24, %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymm24 +0x62,0x82,0x46,0x20,0xda,0xf0 + +# ATT: vsm4key4 %xmm24, %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmm24 +0x62,0x82,0x46,0x00,0xda,0xf0 + +# ATT: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%rip), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rip] +0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsm4key4 4064(%rcx), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe2,0x46,0x20,0xda,0x71,0x7f + +# ATT: vsm4key4 -4096(%rdx), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096] +0x62,0xe2,0x46,0x20,0xda,0x72,0x80 + +# ATT: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%rip), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rip] +0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsm4key4 2032(%rcx), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe2,0x46,0x00,0xda,0x71,0x7f + +# ATT: vsm4key4 -2048(%rdx), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048] +0x62,0xe2,0x46,0x00,0xda,0x72,0x80 + +# ATT: vsm4rnds4 %ymm24, %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymm24 +0x62,0x82,0x47,0x20,0xda,0xf0 + +# ATT: vsm4rnds4 %xmm24, %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmm24 +0x62,0x82,0x47,0x00,0xda,0xf0 + +# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%rip), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rip] +0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsm4rnds4 4064(%rcx), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe2,0x47,0x20,0xda,0x71,0x7f + +# ATT: vsm4rnds4 -4096(%rdx), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096] +0x62,0xe2,0x47,0x20,0xda,0x72,0x80 + +# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%rip), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rip] +0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsm4rnds4 2032(%rcx), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe2,0x47,0x00,0xda,0x71,0x7f + +# ATT: vsm4rnds4 -2048(%rdx), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048] +0x62,0xe2,0x47,0x00,0xda,0x72,0x80 diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt new file mode 100644 index 00000000000000..c1cb271a967b13 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt @@ -0,0 +1,170 @@ +# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vsm4key4 %zmm4, %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmm4 +0x62,0xf2,0x66,0x48,0xda,0xd4 + +# ATT: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%eax), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [eax] +0x62,0xf2,0x66,0x48,0xda,0x10 + +# ATT: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsm4key4 8128(%ecx), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf2,0x66,0x48,0xda,0x51,0x7f + +# ATT: vsm4key4 -8192(%edx), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192] +0x62,0xf2,0x66,0x48,0xda,0x52,0x80 + +# ATT: vsm4rnds4 %zmm4, %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmm4 +0x62,0xf2,0x67,0x48,0xda,0xd4 + +# ATT: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%eax), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax] +0x62,0xf2,0x67,0x48,0xda,0x10 + +# ATT: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsm4rnds4 8128(%ecx), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf2,0x67,0x48,0xda,0x51,0x7f + +# ATT: vsm4rnds4 -8192(%edx), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192] +0x62,0xf2,0x67,0x48,0xda,0x52,0x80 + +# ATT: vsm4key4 %ymm4, %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymm4 +0x62,0xf2,0x66,0x28,0xda,0xd4 + +# ATT: vsm4key4 %xmm4, %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmm4 +0x62,0xf2,0x66,0x08,0xda,0xd4 + +# ATT: vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%eax), %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [eax] +0x62,0xf2,0x66,0x28,0xda,0x10 + +# ATT: vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsm4key4 4064(%ecx), %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf2,0x66,0x28,0xda,0x51,0x7f + +# ATT: vsm4key4 -4096(%edx), %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096] +0x62,0xf2,0x66,0x28,0xda,0x52,0x80 + +# ATT: vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%eax), %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [eax] +0x62,0xf2,0x66,0x08,0xda,0x10 + +# ATT: vsm4key4 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsm4key4 2032(%ecx), %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf2,0x66,0x08,0xda,0x51,0x7f + +# ATT: vsm4key4 -2048(%edx), %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048] +0x62,0xf2,0x66,0x08,0xda,0x52,0x80 + +# ATT: vsm4rnds4 %ymm4, %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymm4 +0x62,0xf2,0x67,0x28,0xda,0xd4 + +# ATT: vsm4rnds4 %xmm4, %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmm4 +0x62,0xf2,0x67,0x08,0xda,0xd4 + +# ATT: vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%eax), %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [eax] +0x62,0xf2,0x67,0x28,0xda,0x10 + +# ATT: vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsm4rnds4 4064(%ecx), %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf2,0x67,0x28,0xda,0x51,0x7f + +# ATT: vsm4rnds4 -4096(%edx), %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096] +0x62,0xf2,0x67,0x28,0xda,0x52,0x80 + +# ATT: vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%eax), %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [eax] +0x62,0xf2,0x67,0x08,0xda,0x10 + +# ATT: vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsm4rnds4 2032(%ecx), %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf2,0x67,0x08,0xda,0x51,0x7f + +# ATT: vsm4rnds4 -2048(%edx), %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048] +0x62,0xf2,0x67,0x08,0xda,0x52,0x80 diff --git a/llvm/test/MC/X86/sm4-evex-32-att.s b/llvm/test/MC/X86/sm4-evex-32-att.s new file mode 100644 index 00000000000000..389a29b1189795 --- /dev/null +++ b/llvm/test/MC/X86/sm4-evex-32-att.s @@ -0,0 +1,224 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vsm4key4 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0] + vsm4key4 %zmm24, %zmm23, %zmm22 + +// CHECK: vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22 + +// CHECK: vsm4key4 (%rip), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 (%rip), %zmm23, %zmm22 + +// CHECK: vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff] + vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vsm4key4 8128(%rcx), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f] + vsm4key4 8128(%rcx), %zmm23, %zmm22 + +// CHECK: vsm4key4 -8192(%rdx), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80] + vsm4key4 -8192(%rdx), %zmm23, %zmm22 + +// CHECK: vsm4rnds4 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0] + vsm4rnds4 %zmm24, %zmm23, %zmm22 + +// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22 + +// CHECK: vsm4rnds4 (%rip), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4rnds4 (%rip), %zmm23, %zmm22 + +// CHECK: vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff] + vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vsm4rnds4 8128(%rcx), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f] + vsm4rnds4 8128(%rcx), %zmm23, %zmm22 + +// CHECK: vsm4rnds4 -8192(%rdx), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80] + vsm4rnds4 -8192(%rdx), %zmm23, %zmm22 + +// CHECK: vsm4key4 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0] + vsm4key4 %ymm24, %ymm23, %ymm22 + +// CHECK: vsm4key4 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0] + vsm4key4 %xmm24, %xmm23, %xmm22 + +// CHECK: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22 + +// CHECK: vsm4key4 (%rip), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 (%rip), %ymm23, %ymm22 + +// CHECK: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vsm4key4 4064(%rcx), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f] + vsm4key4 4064(%rcx), %ymm23, %ymm22 + +// CHECK: vsm4key4 -4096(%rdx), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80] + vsm4key4 -4096(%rdx), %ymm23, %ymm22 + +// CHECK: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22 + +// CHECK: vsm4key4 (%rip), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 (%rip), %xmm23, %xmm22 + +// CHECK: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsm4key4 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vsm4key4 2032(%rcx), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f] + vsm4key4 2032(%rcx), %xmm23, %xmm22 + +// CHECK: vsm4key4 -2048(%rdx), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80] + vsm4key4 -2048(%rdx), %xmm23, %xmm22 +// CHECK: vsm4key4 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0] + vsm4key4 %ymm24, %ymm23, %ymm22 + +// CHECK: vsm4key4 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0] + vsm4key4 %xmm24, %xmm23, %xmm22 + +// CHECK: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22 + +// CHECK: vsm4key4 (%rip), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 (%rip), %ymm23, %ymm22 + +// CHECK: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vsm4key4 4064(%rcx), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f] + vsm4key4 4064(%rcx), %ymm23, %ymm22 + +// CHECK: vsm4key4 -4096(%rdx), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80] + vsm4key4 -4096(%rdx), %ymm23, %ymm22 + +// CHECK: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22 + +// CHECK: vsm4key4 (%rip), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 (%rip), %xmm23, %xmm22 + +// CHECK: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsm4key4 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vsm4key4 2032(%rcx), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f] + vsm4key4 2032(%rcx), %xmm23, %xmm22 + +// CHECK: vsm4key4 -2048(%rdx), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80] + vsm4key4 -2048(%rdx), %xmm23, %xmm22 + +// CHECK: vsm4rnds4 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0] + vsm4rnds4 %ymm24, %ymm23, %ymm22 + +// CHECK: vsm4rnds4 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0] + vsm4rnds4 %xmm24, %xmm23, %xmm22 + +// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22 + +// CHECK: vsm4rnds4 (%rip), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4rnds4 (%rip), %ymm23, %ymm22 + +// CHECK: vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vsm4rnds4 4064(%rcx), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f] + vsm4rnds4 4064(%rcx), %ymm23, %ymm22 + +// CHECK: vsm4rnds4 -4096(%rdx), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80] + vsm4rnds4 -4096(%rdx), %ymm23, %ymm22 + +// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22 + +// CHECK: vsm4rnds4 (%rip), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4rnds4 (%rip), %xmm23, %xmm22 + +// CHECK: vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vsm4rnds4 2032(%rcx), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f] + vsm4rnds4 2032(%rcx), %xmm23, %xmm22 + +// CHECK: vsm4rnds4 -2048(%rdx), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80] + vsm4rnds4 -2048(%rdx), %xmm23, %xmm22 diff --git a/llvm/test/MC/X86/sm4-evex-32-intel.s b/llvm/test/MC/X86/sm4-evex-32-intel.s new file mode 100644 index 00000000000000..3cc18cf4178ed8 --- /dev/null +++ b/llvm/test/MC/X86/sm4-evex-32-intel.s @@ -0,0 +1,169 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vsm4key4 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0] + vsm4key4 zmm22, zmm23, zmm24 + +// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rip] +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 zmm22, zmm23, zmmword ptr [rip] + +// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff] + vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f] + vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192] +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80] + vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192] + +// CHECK: vsm4rnds4 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0] + vsm4rnds4 zmm22, zmm23, zmm24 + +// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rip] +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4rnds4 zmm22, zmm23, zmmword ptr [rip] + +// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff] + vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f] + vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192] +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80] + vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192] + +// CHECK: vsm4key4 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0] + vsm4key4 ymm22, ymm23, ymm24 + +// CHECK: vsm4key4 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0] + vsm4key4 xmm22, xmm23, xmm24 + +// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rip] +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 ymm22, ymm23, ymmword ptr [rip] + +// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f] + vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80] + vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096] + +// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rip] +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 xmm22, xmm23, xmmword ptr [rip] + +// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f] + vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80] + vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048] + +// CHECK: vsm4rnds4 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0] + vsm4rnds4 ymm22, ymm23, ymm24 + +// CHECK: vsm4rnds4 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0] + vsm4rnds4 xmm22, xmm23, xmm24 + +// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rip] +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4rnds4 ymm22, ymm23, ymmword ptr [rip] + +// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f] + vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80] + vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096] + +// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rip] +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4rnds4 xmm22, xmm23, xmmword ptr [rip] + +// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f] + vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80] + vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048] diff --git a/llvm/test/MC/X86/sm4-evex-64-att.s b/llvm/test/MC/X86/sm4-evex-64-att.s new file mode 100644 index 00000000000000..de10d95ac74d7b --- /dev/null +++ b/llvm/test/MC/X86/sm4-evex-64-att.s @@ -0,0 +1,169 @@ +// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vsm4key4 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4] + vsm4key4 %zmm4, %zmm3, %zmm2 + +// CHECK: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2 + +// CHECK: vsm4key4 (%eax), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10] + vsm4key4 (%eax), %zmm3, %zmm2 + +// CHECK: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff] + vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vsm4key4 8128(%ecx), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f] + vsm4key4 8128(%ecx), %zmm3, %zmm2 + +// CHECK: vsm4key4 -8192(%edx), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80] + vsm4key4 -8192(%edx), %zmm3, %zmm2 + +// CHECK: vsm4rnds4 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4] + vsm4rnds4 %zmm4, %zmm3, %zmm2 + +// CHECK: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2 + +// CHECK: vsm4rnds4 (%eax), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10] + vsm4rnds4 (%eax), %zmm3, %zmm2 + +// CHECK: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff] + vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vsm4rnds4 8128(%ecx), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f] + vsm4rnds4 8128(%ecx), %zmm3, %zmm2 + +// CHECK: vsm4rnds4 -8192(%edx), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80] + vsm4rnds4 -8192(%edx), %zmm3, %zmm2 + +// CHECK: {evex} vsm4key4 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4] + {evex} vsm4key4 %ymm4, %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4] + {evex} vsm4key4 %xmm4, %xmm3, %xmm2 + +// CHECK: {evex} vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 (%eax), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10] + {evex} vsm4key4 (%eax), %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff] + {evex} vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 4064(%ecx), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f] + {evex} vsm4key4 4064(%ecx), %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 -4096(%edx), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80] + {evex} vsm4key4 -4096(%edx), %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: {evex} vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2 + +// CHECK: {evex} vsm4key4 (%eax), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10] + {evex} vsm4key4 (%eax), %xmm3, %xmm2 + +// CHECK: {evex} vsm4key4 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff] + {evex} vsm4key4 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: {evex} vsm4key4 2032(%ecx), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f] + {evex} vsm4key4 2032(%ecx), %xmm3, %xmm2 + +// CHECK: {evex} vsm4key4 -2048(%edx), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80] + {evex} vsm4key4 -2048(%edx), %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4] + {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4] + {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 (%eax), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10] + {evex} vsm4rnds4 (%eax), %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff] + {evex} vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 4064(%ecx), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f] + {evex} vsm4rnds4 4064(%ecx), %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 -4096(%edx), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80] + {evex} vsm4rnds4 -4096(%edx), %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 (%eax), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10] + {evex} vsm4rnds4 (%eax), %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff] + {evex} vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 2032(%ecx), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f] + {evex} vsm4rnds4 2032(%ecx), %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 -2048(%edx), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80] + {evex} vsm4rnds4 -2048(%edx), %xmm3, %xmm2 \ No newline at end of file diff --git a/llvm/test/MC/X86/sm4-evex-64-intel.s b/llvm/test/MC/X86/sm4-evex-64-intel.s new file mode 100644 index 00000000000000..812fdb13f80913 --- /dev/null +++ b/llvm/test/MC/X86/sm4-evex-64-intel.s @@ -0,0 +1,169 @@ +// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vsm4key4 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4] + vsm4key4 zmm2, zmm3, zmm4 + +// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [eax] +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10] + vsm4key4 zmm2, zmm3, zmmword ptr [eax] + +// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff] + vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f] + vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192] +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80] + vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192] + +// CHECK: vsm4rnds4 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4] + vsm4rnds4 zmm2, zmm3, zmm4 + +// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax] +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10] + vsm4rnds4 zmm2, zmm3, zmmword ptr [eax] + +// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff] + vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f] + vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192] +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80] + vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192] + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4] + {evex} vsm4key4 ymm2, ymm3, ymm4 + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4] + {evex} vsm4key4 xmm2, xmm3, xmm4 + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax] +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10] + {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax] + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff] + {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f] + {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096] +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80] + {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096] + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax] +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10] + {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax] + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff] + {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f] + {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048] +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80] + {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048] + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4] + {evex} vsm4rnds4 ymm2, ymm3, ymm4 + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4] + {evex} vsm4rnds4 xmm2, xmm3, xmm4 + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax] +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10] + {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax] + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff] + {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f] + {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096] +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80] + {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096] + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax] +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10] + {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax] + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff] + {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f] + {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048] +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80] + {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048] diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 85d9b02ac0cbf1..43c206fa0af698 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -4113,8 +4113,14 @@ static const X86FoldTableEntry Table2[] = { {X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0}, {X86::VSHUFPSrri, X86::VSHUFPSrmi, 0}, {X86::VSM4KEY4Yrr, X86::VSM4KEY4Yrm, 0}, + {X86::VSM4KEY4Z128rr, X86::VSM4KEY4Z128rm, 0}, + {X86::VSM4KEY4Z256rr, X86::VSM4KEY4Z256rm, 0}, + {X86::VSM4KEY4Zrr, X86::VSM4KEY4Zrm, 0}, {X86::VSM4KEY4rr, X86::VSM4KEY4rm, 0}, {X86::VSM4RNDS4Yrr, X86::VSM4RNDS4Yrm, 0}, + {X86::VSM4RNDS4Z128rr, X86::VSM4RNDS4Z128rm, 0}, + {X86::VSM4RNDS4Z256rr, X86::VSM4RNDS4Z256rm, 0}, + {X86::VSM4RNDS4Zrr, X86::VSM4RNDS4Zrm, 0}, {X86::VSM4RNDS4rr, X86::VSM4RNDS4rm, 0}, {X86::VSQRTNEPBF16Z128rkz, X86::VSQRTNEPBF16Z128mkz, 0}, {X86::VSQRTNEPBF16Z256rkz, X86::VSQRTNEPBF16Z256mkz, 0}, _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits