Author: Mahesh-Attarde Date: 2024-09-18T21:01:51+08:00 New Revision: 311e4e3245818d42e2bd148157c960f567f37096
URL: https://github.com/llvm/llvm-project/commit/311e4e3245818d42e2bd148157c960f567f37096 DIFF: https://github.com/llvm/llvm-project/commit/311e4e3245818d42e2bd148157c960f567f37096.diff LOG: [X86][AVX10.2] Support AVX10.2 MOVZXC new Instructions. (#108537) Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 Chapter 14 INTELĀ® AVX10 ZERO-EXTENDING PARTIAL VECTOR COPY INSTRUCTIONS --------- Co-authored-by: mattarde <matta...@intel.com> Added: clang/lib/Headers/avx10_2copyintrin.h clang/test/CodeGen/X86/avx512copy-builtins.c llvm/test/CodeGen/X86/avx512copy-intrinsics.ll llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt llvm/test/MC/X86/avx10.2-copy-32-att.s llvm/test/MC/X86/avx10.2-copy-32-intel.s llvm/test/MC/X86/avx10.2-copy-64-att.s llvm/test/MC/X86/avx10.2-copy-64-intel.s Modified: clang/lib/Headers/CMakeLists.txt clang/lib/Headers/immintrin.h llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86InstrAVX10.td llvm/test/TableGen/x86-fold-tables.inc llvm/utils/TableGen/X86ManualInstrMapping.def Removed: ################################################################################ diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 4c75c638b41bae..f5cc07c303f9eb 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -156,6 +156,7 @@ set(x86_files avx10_2_512satcvtintrin.h avx10_2bf16intrin.h avx10_2convertintrin.h + avx10_2copyintrin.h avx10_2minmaxintrin.h avx10_2niintrin.h avx10_2satcvtdsintrin.h diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h new file mode 100644 index 00000000000000..7fc31190781d91 --- /dev/null +++ b/clang/lib/Headers/avx10_2copyintrin.h @@ -0,0 +1,34 @@ +/*===---- avx10_2copyintrin.h - AVX10.2 Copy intrinsics -------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead." +#endif // __IMMINTRIN_H + +#ifndef __AVX10_2COPYINTRIN_H +#define __AVX10_2COPYINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(128))) + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi32(__m128i __A) { + return (__m128i)__builtin_shufflevector( + (__v4si)__A, (__v4si)_mm_setzero_si128(), 0, 4, 4, 4); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) { + return (__m128i)__builtin_shufflevector( + (__v8hi)__A, (__v8hi)_mm_setzero_si128(), 0, 8, 8, 8, 8, 8, 8, 8); +} + +#undef __DEFAULT_FN_ATTRS128 + +#endif // __AVX10_2COPYINTRIN_H diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 280154f3c1026e..3fbabffa98df20 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -651,6 +651,7 @@ _storebe_i64(void * __P, long long __D) { #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__) #include <avx10_2bf16intrin.h> #include <avx10_2convertintrin.h> +#include <avx10_2copyintrin.h> #include <avx10_2minmaxintrin.h> #include <avx10_2niintrin.h> #include <avx10_2satcvtdsintrin.h> diff --git a/clang/test/CodeGen/X86/avx512copy-builtins.c b/clang/test/CodeGen/X86/avx512copy-builtins.c new file mode 100644 index 00000000000000..06f7507bde53ed --- /dev/null +++ b/clang/test/CodeGen/X86/avx512copy-builtins.c @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 \ +// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s + +#include <immintrin.h> +#include <stddef.h> + +__m128i test_mm_move_epi32(__m128i A) { + // CHECK-LABEL: test_mm_move_epi32 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 4, i32 4> + return _mm_move_epi32(A); +} + +__m128i test_mm_move_epi16(__m128i A) { + // CHECK-LABEL: test_mm_move_epi16 + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> + return _mm_move_epi16(A); +} diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 182f6c08366a99..68563f556ecfb4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12348,7 +12348,7 @@ static SDValue lowerShuffleAsElementInsertion( } V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S); } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 || - EltVT == MVT::i16) { + (EltVT == MVT::i16 && !Subtarget.hasAVX10_2())) { // Either not inserting from the low element of the input or the input // element size is too small to use VZEXT_MOVL to clear the high bits. return SDValue(); diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index f0334109a32b68..625f2e01d47218 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1583,3 +1583,67 @@ let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { "vucomxss", SSEPackedSingle>, TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; } + +//------------------------------------------------- +// AVX10 MOVZXC (COPY) instructions +//------------------------------------------------- +let Predicates = [HasAVX10_2] in { + def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovd\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v4i32 (X86vzmovl + (v4i32 VR128X:$src))))]>, EVEX, + Sched<[WriteVecMoveFromGpr]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in + def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), + (ins i32mem:$src), + "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, + EVEX_CD8<32, CD8VT1>, + Sched<[WriteVecLoad]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in + def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs), + (ins i32mem:$dst, VR128X:$src), + "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, + EVEX_CD8<32, CD8VT1>, + Sched<[WriteVecStore]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in + def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, + Sched<[WriteVecMoveFromGpr]>; + def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}", + (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>; + +def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovw\t{$src, $dst|$dst, $src}", + [(set VR128X:$dst, (v8i16 (X86vzmovl + (v8i16 VR128X:$src))))]>, EVEX, T_MAP5, + Sched<[WriteVecMoveFromGpr]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in + def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst), + (ins i16mem:$src), + "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, + EVEX_CD8<16, CD8VT1>, T_MAP5, + Sched<[WriteVecLoad]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in + def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs), + (ins i32mem:$dst, VR128X:$src), + "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, + EVEX_CD8<16, CD8VT1>, T_MAP5, + Sched<[WriteVecStore]>; + +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in + def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src), + "vmovw\t{$src, $dst|$dst, $src}", + []>, EVEX, T_MAP5, + Sched<[WriteVecMoveFromGpr]>; + def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}", + (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>; +} diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll new file mode 100644 index 00000000000000..a7ca23792e6feb --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX102 +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=NOAVX512MOVZXC + +define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind { +; AVX102-LABEL: test_mm_move_epi32: +; AVX102: # %bb.0: +; AVX102-NEXT: vmovd %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xc0] +; AVX102-NEXT: retq # encoding: [0xc3] +; +; NOAVX512MOVZXC-LABEL: test_mm_move_epi32: +; NOAVX512MOVZXC: # %bb.0: +; NOAVX512MOVZXC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] +; NOAVX512MOVZXC-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] +; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] +; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3] + %res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4> + ret <4 x i32> %res +} + +define <8 x i16> @test_mm_move_epi16(<8 x i16> %a0) nounwind { +; AVX102-LABEL: test_mm_move_epi16: +; AVX102: # %bb.0: +; AVX102-NEXT: vmovw %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xc0] +; AVX102-NEXT: retq # encoding: [0xc3] +; +; NOAVX512MOVZXC-LABEL: test_mm_move_epi16: +; NOAVX512MOVZXC: # %bb.0: +; NOAVX512MOVZXC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] +; NOAVX512MOVZXC-NEXT: vpblendw $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0e,0xc0,0x01] +; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] +; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3] + %res = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> + ret <8 x i16> %res +} diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt new file mode 100644 index 00000000000000..e86c2340a486c5 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt @@ -0,0 +1,34 @@ +# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vmovd (%ecx), %xmm5 +# INTEL: vmovd xmm5, dword ptr [ecx] +0x62 0xf1 0x7e 0x08 0x7e 0x29 + +# ATT: vmovd %xmm5, (%ecx) +# INTEL: vmovd dword ptr [ecx], xmm5 +0x62 0xf1 0x7d 0x08 0xd6 0x29 + +# ATT: vmovd %xmm2, %xmm1 +# INTEL: vmovd xmm1, xmm2 +0x62 0xf1 0x7e 0x08 0x7e 0xca + +# ATT: vmovd %xmm2, %xmm1 +# INTEL: vmovd xmm1, xmm2 +0x62 0xf1 0x7d 0x08 0xd6 0xca + +# ATT: vmovw %xmm5, (%ecx) +# INTEL: vmovw dword ptr [ecx], xmm5 +0x62 0xf5 0x7e 0x08 0x7e 0x29 + +# ATT: vmovw (%ecx), %xmm5 +# INTEL: vmovw xmm5, word ptr [ecx] +0x62 0xf5 0x7e 0x08 0x6e 0x29 + +# ATT: vmovw %xmm2, %xmm1 +# INTEL: vmovw xmm1, xmm2 +0x62 0xf5 0x7e 0x08 0x6e 0xca + +# ATT: vmovw %xmm2, %xmm1 +# INTEL: vmovw xmm1, xmm2 +0x62 0xf5 0x7e 0x08 0x7e 0xca diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt new file mode 100644 index 00000000000000..36ddd75a77ad39 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt @@ -0,0 +1,34 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vmovd (%rcx), %xmm29 +# INTEL: vmovd xmm29, dword ptr [rcx] +0x62 0x61 0x7e 0x08 0x7e 0x29 + +# ATT: vmovd %xmm29, (%rcx) +# INTEL: vmovd dword ptr [rcx], xmm29 +0x62 0x61 0x7d 0x08 0xd6 0x29 + +# ATT: vmovd %xmm22, %xmm21 +# INTEL: vmovd xmm21, xmm22 +0x62 0xa1 0x7e 0x08 0x7e 0xee + +# ATT: vmovd %xmm22, %xmm21 +# INTEL: vmovd xmm21, xmm22 +0x62 0xa1 0x7d 0x08 0xd6 0xee + +# ATT: vmovw %xmm29, (%rcx) +# INTEL: vmovw dword ptr [rcx], xmm29 +0x62 0x65 0x7e 0x08 0x7e 0x29 + +# ATT: vmovw (%rcx), %xmm29 +# INTEL: vmovw xmm29, word ptr [rcx] +0x62 0x65 0x7e 0x08 0x6e 0x29 + +# ATT: vmovw %xmm22, %xmm21 +# INTEL: vmovw xmm21, xmm22 +0x62 0xa5 0x7e 0x08 0x6e 0xee + +# ATT: vmovw %xmm22, %xmm21 +# INTEL: vmovw xmm21, xmm22 +0x62 0xa5 0x7e 0x08 0x7e 0xee diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s new file mode 100644 index 00000000000000..2bc498720849c9 --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s @@ -0,0 +1,82 @@ +// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s + +// CHECK: vmovd 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10] + vmovd 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vmovd 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0x87,0x23,0x01,0x00,0x00] + vmovd 291(%edi,%eax,4), %xmm2 + +// CHECK: vmovd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x10] + vmovd (%eax), %xmm2 + +// CHECK: vmovd -128(,%ebp,2), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x6e,0x14,0x6d,0x80,0xff,0xff,0xff] + vmovd -128(,%ebp,2), %xmm2 + +// CHECK: vmovd %xmm3, 268435456(%esp,%esi,8) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10] + vmovd %xmm3, 268435456(%esp,%esi,8) + +// CHECK: vmovd %xmm3, 291(%edi,%eax,4) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00] + vmovd %xmm3, 291(%edi,%eax,4) + +// CHECK: vmovd %xmm3, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x18] + vmovd %xmm3, (%eax) + +// CHECK: vmovd %xmm3, -128(,%ebp,2) +// CHECK: encoding: [0xc5,0xf9,0x7e,0x1c,0x6d,0x80,0xff,0xff,0xff] + vmovd %xmm3, -128(,%ebp,2) + +// CHECK: vmovw 268435456(%esp,%esi,8), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10] + vmovw 268435456(%esp,%esi,8), %xmm2 + +// CHECK: vmovw 291(%edi,%eax,4), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0x87,0x23,0x01,0x00,0x00] + vmovw 291(%edi,%eax,4), %xmm2 + +// CHECK: vmovw (%eax), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x10] + vmovw (%eax), %xmm2 + +// CHECK: vmovw -64(,%ebp,2), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x14,0x6d,0xc0,0xff,0xff,0xff] + vmovw -64(,%ebp,2), %xmm2 + +// CHECK: vmovw 254(%ecx), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x51,0x7f] + vmovw 254(%ecx), %xmm2 + +// CHECK: vmovw -256(%edx), %xmm2 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x52,0x80] + vmovw -256(%edx), %xmm2 + +// CHECK: vmovw %xmm3, 268435456(%esp,%esi,8) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10] + vmovw %xmm3, 268435456(%esp,%esi,8) + +// CHECK: vmovw %xmm3, 291(%edi,%eax,4) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00] + vmovw %xmm3, 291(%edi,%eax,4) + +// CHECK: vmovw %xmm3, (%eax) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x18] + vmovw %xmm3, (%eax) + +// CHECK: vmovw %xmm3, -64(,%ebp,2) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x1c,0x6d,0xc0,0xff,0xff,0xff] + vmovw %xmm3, -64(,%ebp,2) + +// CHECK: vmovw %xmm3, 254(%ecx) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x59,0x7f] + vmovw %xmm3, 254(%ecx) + +// CHECK: vmovw %xmm3, -256(%edx) +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x5a,0x80] + vmovw %xmm3, -256(%edx) + diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s new file mode 100644 index 00000000000000..aa84548e5f75dd --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s @@ -0,0 +1,81 @@ +// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vmovd xmm2, dword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10] + vmovd xmm2, dword ptr [esp + 8*esi + 268435456] + +// CHECK: vmovd xmm2, dword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0x87,0x23,0x01,0x00,0x00] + vmovd xmm2, dword ptr [edi + 4*eax + 291] + +// CHECK: vmovd xmm2, dword ptr [eax] +// CHECK: encoding: [0xc5,0xf9,0x6e,0x10] + vmovd xmm2, dword ptr [eax] + +// CHECK: vmovd xmm2, dword ptr [2*ebp - 128] +// CHECK: encoding: [0xc5,0xf9,0x6e,0x14,0x6d,0x80,0xff,0xff,0xff] + vmovd xmm2, dword ptr [2*ebp - 128] + +// CHECK: vmovd dword ptr [esp + 8*esi + 268435456], xmm3 +// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10] + vmovd dword ptr [esp + 8*esi + 268435456], xmm3 + +// CHECK: vmovd dword ptr [edi + 4*eax + 291], xmm3 +// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00] + vmovd dword ptr [edi + 4*eax + 291], xmm3 + +// CHECK: vmovd dword ptr [eax], xmm3 +// CHECK: encoding: [0xc5,0xf9,0x7e,0x18] + vmovd dword ptr [eax], xmm3 + +// CHECK: vmovd dword ptr [2*ebp - 128], xmm3 +// CHECK: encoding: [0xc5,0xf9,0x7e,0x1c,0x6d,0x80,0xff,0xff,0xff] + vmovd dword ptr [2*ebp - 128], xmm3 + +// CHECK: vmovw xmm2, word ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10] + vmovw xmm2, word ptr [esp + 8*esi + 268435456] + +// CHECK: vmovw xmm2, word ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0x87,0x23,0x01,0x00,0x00] + vmovw xmm2, word ptr [edi + 4*eax + 291] + +// CHECK: vmovw xmm2, word ptr [eax] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x10] + vmovw xmm2, word ptr [eax] + +// CHECK: vmovw xmm2, word ptr [2*ebp - 64] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x14,0x6d,0xc0,0xff,0xff,0xff] + vmovw xmm2, word ptr [2*ebp - 64] + +// CHECK: vmovw xmm2, word ptr [ecx + 254] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x51,0x7f] + vmovw xmm2, word ptr [ecx + 254] + +// CHECK: vmovw xmm2, word ptr [edx - 256] +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x52,0x80] + vmovw xmm2, word ptr [edx - 256] + +// CHECK: vmovw word ptr [esp + 8*esi + 268435456], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10] + vmovw word ptr [esp + 8*esi + 268435456], xmm3 + +// CHECK: vmovw word ptr [edi + 4*eax + 291], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00] + vmovw word ptr [edi + 4*eax + 291], xmm3 + +// CHECK: vmovw word ptr [eax], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x18] + vmovw word ptr [eax], xmm3 + +// CHECK: vmovw word ptr [2*ebp - 64], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x1c,0x6d,0xc0,0xff,0xff,0xff] + vmovw word ptr [2*ebp - 64], xmm3 + +// CHECK: vmovw word ptr [ecx + 254], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x59,0x7f] + vmovw word ptr [ecx + 254], xmm3 + +// CHECK: vmovw word ptr [edx - 256], xmm3 +// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x5a,0x80] + vmovw word ptr [edx - 256], xmm3 diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s new file mode 100644 index 00000000000000..a672b2d842240c --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s @@ -0,0 +1,97 @@ +// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s + +// CHECK: vmovd 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovd 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vmovd 291(%r8,%rax,4), %xmm22 +// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovd 291(%r8,%rax,4), %xmm22 + +// CHECK: vmovd (%rip), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00] + vmovd (%rip), %xmm22 + +// CHECK: vmovd -128(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x34,0x6d,0x80,0xff,0xff,0xff] + vmovd -128(,%rbp,2), %xmm22 + +// CHECK: vmovd 508(%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x71,0x7f] + vmovd 508(%rcx), %xmm22 + +// CHECK: vmovd -512(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x72,0x80] + vmovd -512(%rdx), %xmm22 + +// CHECK: vmovd %xmm23, 268435456(%rbp,%r14,8) +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10] + vmovd %xmm23, 268435456(%rbp,%r14,8) + +// CHECK: vmovd %xmm23, 291(%r8,%rax,4) +// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00] + vmovd %xmm23, 291(%r8,%rax,4) + +// CHECK: vmovd %xmm23, (%rip) +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00] + vmovd %xmm23, (%rip) + +// CHECK: vmovd %xmm23, -128(,%rbp,2) +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3c,0x6d,0x80,0xff,0xff,0xff] + vmovd %xmm23, -128(,%rbp,2) + +// CHECK: vmovd %xmm23, 508(%rcx) +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x79,0x7f] + vmovd %xmm23, 508(%rcx) + +// CHECK: vmovd %xmm23, -512(%rdx) +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x7a,0x80] + vmovd %xmm23, -512(%rdx) + +// CHECK: vmovw 268435456(%rbp,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovw 268435456(%rbp,%r14,8), %xmm22 + +// CHECK: vmovw 291(%r8,%rax,4), %xmm22 +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovw 291(%r8,%rax,4), %xmm22 + +// CHECK: vmovw (%rip), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00] + vmovw (%rip), %xmm22 + +// CHECK: vmovw -64(,%rbp,2), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x34,0x6d,0xc0,0xff,0xff,0xff] + vmovw -64(,%rbp,2), %xmm22 + +// CHECK: vmovw 254(%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x71,0x7f] + vmovw 254(%rcx), %xmm22 + +// CHECK: vmovw -256(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x72,0x80] + vmovw -256(%rdx), %xmm22 + +// CHECK: vmovw %xmm23, 268435456(%rbp,%r14,8) +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10] + vmovw %xmm23, 268435456(%rbp,%r14,8) + +// CHECK: vmovw %xmm23, 291(%r8,%rax,4) +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00] + vmovw %xmm23, 291(%r8,%rax,4) + +// CHECK: vmovw %xmm23, (%rip) +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00] + vmovw %xmm23, (%rip) + +// CHECK: vmovw %xmm23, -64(,%rbp,2) +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3c,0x6d,0xc0,0xff,0xff,0xff] + vmovw %xmm23, -64(,%rbp,2) + +// CHECK: vmovw %xmm23, 254(%rcx) +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x79,0x7f] + vmovw %xmm23, 254(%rcx) + +// CHECK: vmovw %xmm23, -256(%rdx) +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x7a,0x80] + vmovw %xmm23, -256(%rdx) diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s new file mode 100644 index 00000000000000..4fd7b67dfa5db5 --- /dev/null +++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s @@ -0,0 +1,97 @@ +// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vmovd xmm22, dword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovd xmm22, dword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmovd xmm22, dword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovd xmm22, dword ptr [r8 + 4*rax + 291] + +// CHECK: vmovd xmm22, dword ptr [rip] +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00] + vmovd xmm22, dword ptr [rip] + +// CHECK: vmovd xmm22, dword ptr [2*rbp - 128] +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x34,0x6d,0x80,0xff,0xff,0xff] + vmovd xmm22, dword ptr [2*rbp - 128] + +// CHECK: vmovd xmm22, dword ptr [rcx + 508] +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x71,0x7f] + vmovd xmm22, dword ptr [rcx + 508] + +// CHECK: vmovd xmm22, dword ptr [rdx - 512] +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x72,0x80] + vmovd xmm22, dword ptr [rdx - 512] + +// CHECK: vmovd dword ptr [rbp + 8*r14 + 268435456], xmm23 +// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10] + vmovd dword ptr [rbp + 8*r14 + 268435456], xmm23 + +// CHECK: vmovd dword ptr [r8 + 4*rax + 291], xmm23 +// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00] + vmovd dword ptr [r8 + 4*rax + 291], xmm23 + +// CHECK: vmovd dword ptr [rip], xmm23 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00] + vmovd dword ptr [rip], xmm23 + +// CHECK: vmovd dword ptr [2*rbp - 128], xmm23 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3c,0x6d,0x80,0xff,0xff,0xff] + vmovd dword ptr [2*rbp - 128], xmm23 + +// CHECK: vmovd dword ptr [rcx + 508], xmm23 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x79,0x7f] + vmovd dword ptr [rcx + 508], xmm23 + +// CHECK: vmovd dword ptr [rdx - 512], xmm23 +// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x7a,0x80] + vmovd dword ptr [rdx - 512], xmm23 + +// CHECK: vmovw xmm22, word ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10] + vmovw xmm22, word ptr [rbp + 8*r14 + 268435456] + +// CHECK: vmovw xmm22, word ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00] + vmovw xmm22, word ptr [r8 + 4*rax + 291] + +// CHECK: vmovw xmm22, word ptr [rip] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00] + vmovw xmm22, word ptr [rip] + +// CHECK: vmovw xmm22, word ptr [2*rbp - 64] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x34,0x6d,0xc0,0xff,0xff,0xff] + vmovw xmm22, word ptr [2*rbp - 64] + +// CHECK: vmovw xmm22, word ptr [rcx + 254] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x71,0x7f] + vmovw xmm22, word ptr [rcx + 254] + +// CHECK: vmovw xmm22, word ptr [rdx - 256] +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x72,0x80] + vmovw xmm22, word ptr [rdx - 256] + +// CHECK: vmovw word ptr [rbp + 8*r14 + 268435456], xmm23 +// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10] + vmovw word ptr [rbp + 8*r14 + 268435456], xmm23 + +// CHECK: vmovw word ptr [r8 + 4*rax + 291], xmm23 +// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00] + vmovw word ptr [r8 + 4*rax + 291], xmm23 + +// CHECK: vmovw word ptr [rip], xmm23 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00] + vmovw word ptr [rip], xmm23 + +// CHECK: vmovw word ptr [2*rbp - 64], xmm23 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3c,0x6d,0xc0,0xff,0xff,0xff] + vmovw word ptr [2*rbp - 64], xmm23 + +// CHECK: vmovw word ptr [rcx + 254], xmm23 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x79,0x7f] + vmovw word ptr [rcx + 254], xmm23 + +// CHECK: vmovw word ptr [rdx - 256], xmm23 +// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x7a,0x80] + vmovw word ptr [rdx - 256], xmm23 diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 94347839d281f9..85d9b02ac0cbf1 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -1617,8 +1617,10 @@ static const X86FoldTableEntry Table1[] = { {X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0}, {X86::VMOVUPSrr, X86::VMOVUPSrm, 0}, {X86::VMOVW2SHrr, X86::VMOVWrm, TB_NO_REVERSE}, + {X86::VMOVZPDILo2PDIZrr, X86::VMOVZPDILo2PDIZrm, TB_NO_REVERSE}, {X86::VMOVZPQILo2PQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE}, {X86::VMOVZPQILo2PQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE}, + {X86::VMOVZPWILo2PWIZrr, X86::VMOVZPWILo2PWIZrm, TB_NO_REVERSE}, {X86::VPABSBYrr, X86::VPABSBYrm, 0}, {X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0}, {X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0}, diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def index d76c404722b0ac..bc539d792f38df 100644 --- a/llvm/utils/TableGen/X86ManualInstrMapping.def +++ b/llvm/utils/TableGen/X86ManualInstrMapping.def @@ -32,6 +32,7 @@ NOCOMP(VPSRAQZ128ri) NOCOMP(VPSRAQZ128rm) NOCOMP(VPSRAQZ128rr) NOCOMP(VSCALEFPSZ128rm) +NOCOMP(VMOVZPDILo2PDIZrr) NOCOMP(VDBPSADBWZ256rmi) NOCOMP(VDBPSADBWZ256rri) NOCOMP(VPMAXSQZ256rm) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits