llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: Freddy Ye (FreddyLeaf) <details> <summary>Changes</summary> Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368 --- Patch is 59.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113402.diff 16 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsX86.def (+4) - (modified) clang/lib/Headers/CMakeLists.txt (+1) - (modified) clang/lib/Headers/immintrin.h (+3) - (added) clang/lib/Headers/sm4evexintrin.h (+32) - (added) clang/test/CodeGen/X86/sm4-evex-builtins.c (+19) - (modified) llvm/docs/ReleaseNotes.md (+2) - (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+6) - (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+20) - (added) llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll (+64) - (added) llvm/test/MC/Disassembler/X86/sm4-evex-32.txt (+170) - (added) llvm/test/MC/Disassembler/X86/sm4-evex-64.txt (+170) - (added) llvm/test/MC/X86/sm4-evex-32-att.s (+224) - (added) llvm/test/MC/X86/sm4-evex-32-intel.s (+169) - (added) llvm/test/MC/X86/sm4-evex-64-att.s (+169) - (added) llvm/test/MC/X86/sm4-evex-64-intel.s (+169) - (modified) llvm/test/TableGen/x86-fold-tables.inc (+6) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 4c6b22cca421ca..4486eb73a11fa6 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -2179,6 +2179,10 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4") TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4") TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4") +// SM4_EVEX +TARGET_BUILTIN(__builtin_ia32_vsm4key4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4") +TARGET_BUILTIN(__builtin_ia32_vsm4rnds4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4") + // AVX10 MINMAX TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256") TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256") diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index ff392e7122a448..6a594dad0b67d2 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -241,6 +241,7 @@ set(x86_files shaintrin.h sm3intrin.h sm4intrin.h + sm4evexintrin.h smmintrin.h tbmintrin.h tmmintrin.h diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 3fbabffa98df20..1b83dd2162707c 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -665,6 +665,9 @@ _storebe_i64(void * __P, long long __D) { #include <avx10_2_512niintrin.h> #include <avx10_2_512satcvtdsintrin.h> #include <avx10_2_512satcvtintrin.h> +#if (defined(__SM4__)) +#include <sm4evexintrin.h> +#endif #endif #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__) diff --git a/clang/lib/Headers/sm4evexintrin.h b/clang/lib/Headers/sm4evexintrin.h new file mode 100644 index 00000000000000..f6ae0037baea03 --- /dev/null +++ b/clang/lib/Headers/sm4evexintrin.h @@ -0,0 +1,32 @@ +/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use <sm4evexintrin.h> directly; include <immintrin.h> instead." +#endif // __IMMINTRIN_H + +#ifndef __SM4EVEXINTRIN_H +#define __SM4EVEXINTRIN_H + +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("sm4,avx10.2-512"), __min_vector_width__(512))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_sm4key4_epi32(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B); +} + +#undef __DEFAULT_FN_ATTRS512 + +#endif // __SM4EVEXINTRIN_H diff --git a/clang/test/CodeGen/X86/sm4-evex-builtins.c b/clang/test/CodeGen/X86/sm4-evex-builtins.c new file mode 100644 index 00000000000000..0e54bd008d4fb0 --- /dev/null +++ b/clang/test/CodeGen/X86/sm4-evex-builtins.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-- -target-feature +sm4 \ +// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 %s -ffreestanding -triple=i386-- -target-feature +sm4 \ +// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include <immintrin.h> +#include <stddef.h> + +__m512i test_mm512_sm4key4_epi32(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_sm4key4_epi32( + // CHECK: call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + return _mm512_sm4key4_epi32(__A, __B); +} + +__m512i test_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_sm4rnds4_epi32( + // CHECK: call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + return _mm512_sm4rnds4_epi32(__A, __B); +} diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index e5853789c78b63..16764210537689 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -196,6 +196,8 @@ Changes to the X86 Backend * Support ISA of `AVX10.2-256` and `AVX10.2-512`. +* Support ISA of `SM4(EVEX)`. + Changes to the OCaml bindings ----------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 5262e3154ff721..7725bda1f4f598 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -6109,6 +6109,12 @@ let TargetPrefix = "x86" in { DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_x86_vsm4key4512 : ClangBuiltin<"__builtin_ia32_vsm4key4512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty], + [IntrNoMem]>; +def int_x86_vsm4rnds4512 : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty], + [IntrNoMem]>; } //===----------------------------------------------------------------------===// // RAO-INT intrinsics diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 625f2e01d47218..640011f5ed28d7 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1647,3 +1647,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}", (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>; } + +// SM4(EVEX) +multiclass avx10_sm4_base<string OpStr> { + // SM4_Base is in X86InstrSSE.td. + let Predicates = [HasSM4, HasAVX10_2] in { + defm Z128 : SM4_Base<OpStr, avx512vl_i32_info.info128.RC, + "128", avx512vl_i32_info.info128.LdFrag, + avx512vl_i32_info.info128.MemOp>, EVEX_V128; + defm Z256 : SM4_Base<OpStr, avx512vl_i32_info.info256.RC, + "256", avx512vl_i32_info.info256.LdFrag, + avx512vl_i32_info.info256.MemOp>, EVEX_V256; + } + let Predicates = [HasSM4, HasAVX10_2_512] in + defm Z : SM4_Base<OpStr, avx512vl_i32_info.info512.RC, + "512", avx512vl_i32_info.info512.LdFrag, + avx512vl_i32_info.info512.MemOp>, EVEX_V512; +} + +defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV; +defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV; diff --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll new file mode 100644 index 00000000000000..fc46d3cf23fd41 --- /dev/null +++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s + +define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4key4128: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4key4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B) + ret <4 x i32> %ret +} +declare <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B) + +define <8 x i32> @test_int_x86_vsm4key4256(<8 x i32> %A, <8 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4key4256: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4key4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7e,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B) + ret <8 x i32> %ret +} +declare <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B) + +define <16 x i32> @test_int_x86_vsm4key4512(<16 x i32> %A, <16 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4key4512: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4key4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B) + ret <16 x i32> %ret +} +declare <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B) + +define <4 x i32> @test_int_x86_vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4rnds4128: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4rnds4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7b,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) + ret <4 x i32> %ret +} +declare <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) + +define <8 x i32> @test_int_x86_vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4rnds4256: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4rnds4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7f,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) + ret <8 x i32> %ret +} +declare <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) + +define <16 x i32> @test_int_x86_vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4rnds4512: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4rnds4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) + ret <16 x i32> %ret +} +declare <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) + diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt new file mode 100644 index 00000000000000..f89f4b5a8c0fb8 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt @@ -0,0 +1,170 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vsm4key4 %zmm24, %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmm24 +0x62,0x82,0x46,0x40,0xda,0xf0 + +# ATT: vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%rip), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rip] +0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsm4key4 8128(%rcx), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe2,0x46,0x40,0xda,0x71,0x7f + +# ATT: vsm4key4 -8192(%rdx), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192] +0x62,0xe2,0x46,0x40,0xda,0x72,0x80 + +# ATT: vsm4rnds4 %zmm24, %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmm24 +0x62,0x82,0x47,0x40,0xda,0xf0 + +# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%rip), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rip] +0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsm4rnds4 8128(%rcx), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe2,0x47,0x40,0xda,0x71,0x7f + +# ATT: vsm4rnds4 -8192(%rdx), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192] +0x62,0xe2,0x47,0x40,0xda,0x72,0x80 + +# ATT: vsm4key4 %ymm24, %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymm24 +0x62,0x82,0x46,0x20,0xda,0xf0 + +# ATT: vsm4key4 %xmm24, %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmm24 +0x62,0x82,0x46,0x00,0xda,0xf0 + +# ATT: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%rip), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rip] +0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsm4key4 4064(%rcx), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe2,0x46,0x20,0xda,0x71,0x7f + +# ATT: vsm4key4 -4096(%rdx), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096] +0x62,0xe2,0x46,0x20,0xda,0x72,0x80 + +# ATT: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%rip), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rip] +0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsm4key4 2032(%rcx), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe2,0x46,0x00,0xda,0x71,0x7f + +# ATT: vsm4key4 -2048(%rdx), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048] +0x62,0xe2,0x46,0x00,0xda,0x72,0x80 + +# ATT: vsm4rnds4 %ymm24, %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymm24 +0x62,0x82,0x47,0x20,0xda,0xf0 + +# ATT: vsm4rnds4 %xmm24, %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmm24 +0x62,0x82,0x47,0x00,0xda,0xf0 + +# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%rip), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rip] +0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsm4rnds4 4064(%rcx), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe2,0x47,0x20,0xda,0x71,0x7f + +# ATT: vsm4rnds4 -4096(%rdx), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096] +0x62,0xe2,0x47,0x20,0xda,0x72,0x80 + +# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%rip), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rip] +0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsm4rnds4 2032(%rcx), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe2,0x47,0x00,0xda,0x71,0x7f + +# ATT: vsm4rnds4 -2048(%rdx), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048] +0x62,0xe2,0x47,0x00,0xda,0x72,0x80 diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt new file mode 100644 index 00000000000000..c1cb271a967b13 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt @@ -0,0 +1,170 @@ +# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vsm4key4 %zmm4, %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmm4 +0x62,0xf2,0x66,0x48,0xda,0xd4 + +# ATT: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%eax), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [eax] +0x62,0xf2,0x66,0x48,0xda,0x10 + +# ATT: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsm4key4 8128(%ecx), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf2,0x66,0x48,0xda,0x51,0x7f + +# ATT: vsm4key4 -8192(%edx), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192] +0x62,0xf2,0x66,0x48,0xda,0x52,0x80 + +# ATT: vsm4rnds4 %zmm4, %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmm4 +0x62,0xf2,0x67,0x48,0xda,0xd4 + +# ATT: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%eax), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax] +0x62,0xf2,0x67,0x48,0xda,0x10 + +# ATT: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsm4rnds4 8128(%ecx), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf2,0x67,0x48,0xda,0x51,0x7f + +# ATT: vsm4rnds4 -8192(%edx), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192] +0x62,0xf2,0x67,0x48,0xda,0x52,0x80 + +# ATT: vsm4key4 %ymm4, %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymm4 +0x62,0xf2,0x66,0x28,0xda,0xd4 + +# ATT: vsm4key4 %xmm4, %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmm4 +0x62,0xf2,0x66,0x08,0xda,0xd4 + +# ATT: vsm4key4 268435456(%esp... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/113402 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits