https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/83834
>From 9cec3b7b1ea0491df688555a51750efe6c9bd075 Mon Sep 17 00:00:00 2001 From: Phoebe Wang <phoebe.w...@intel.com> Date: Mon, 4 Mar 2024 18:09:41 +0800 Subject: [PATCH] [X86] Add missing subvector_subreg_lowering for BF16 (#83720) --- llvm/lib/Target/X86/X86InstrVecCompiler.td | 3 +++ .../CodeGen/X86/avx512bf16-vl-intrinsics.ll | 22 +++++++++++++++++++ llvm/test/CodeGen/X86/bfloat.ll | 1 - 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86InstrVecCompiler.td b/llvm/lib/Target/X86/X86InstrVecCompiler.td index bbd19cf8d5b25e..461b2badc13134 100644 --- a/llvm/lib/Target/X86/X86InstrVecCompiler.td +++ b/llvm/lib/Target/X86/X86InstrVecCompiler.td @@ -83,6 +83,7 @@ defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>; defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>; defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>; defm : subvector_subreg_lowering<VR128, v8f16, VR256, v16f16, sub_xmm>; +defm : subvector_subreg_lowering<VR128, v8bf16, VR256, v16bf16, sub_xmm>; // A 128-bit subvector extract from the first 512-bit vector position is a // subregister copy that needs no instruction. Likewise, a 128-bit subvector @@ -95,6 +96,7 @@ defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>; defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>; defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>; defm : subvector_subreg_lowering<VR128, v8f16, VR512, v32f16, sub_xmm>; +defm : subvector_subreg_lowering<VR128, v8bf16, VR512, v32bf16, sub_xmm>; // A 128-bit subvector extract from the first 512-bit vector position is a // subregister copy that needs no instruction. Likewise, a 128-bit subvector @@ -107,6 +109,7 @@ defm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>; defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>; defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>; defm : subvector_subreg_lowering<VR256, v16f16, VR512, v32f16, sub_ymm>; +defm : subvector_subreg_lowering<VR256, v16bf16, VR512, v32bf16, sub_ymm>; // If we're inserting into an all zeros vector, just use a plain move which diff --git a/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll index 0826faa1071b01..482713e12d15c7 100644 --- a/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll @@ -381,3 +381,25 @@ entry: %1 = shufflevector <8 x bfloat> %0, <8 x bfloat> undef, <16 x i32> zeroinitializer ret <16 x bfloat> %1 } + +define <16 x i32> @pr83358() { +; X86-LABEL: pr83358: +; X86: # %bb.0: +; X86-NEXT: vcvtneps2bf16y {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 +; X86-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc0,0x00] +; X86-NEXT: # zmm0 = zmm0[0,1,0,1,0,1,0,1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: pr83358: +; X64: # %bb.0: +; X64-NEXT: vcvtneps2bf16y {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc0,0x00] +; X64-NEXT: # zmm0 = zmm0[0,1,0,1,0,1,0,1] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>) + %2 = bitcast <8 x bfloat> %1 to <4 x i32> + %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> + ret <16 x i32> %3 +} diff --git a/llvm/test/CodeGen/X86/bfloat.ll b/llvm/test/CodeGen/X86/bfloat.ll index f2d3c4fb34199e..cd1dba17611628 100644 --- a/llvm/test/CodeGen/X86/bfloat.ll +++ b/llvm/test/CodeGen/X86/bfloat.ll @@ -2423,7 +2423,6 @@ define <16 x bfloat> @fptrunc_v16f32(<16 x float> %a) nounwind { ; AVXNC-LABEL: fptrunc_v16f32: ; AVXNC: # %bb.0: ; AVXNC-NEXT: {vex} vcvtneps2bf16 %ymm0, %xmm0 -; AVXNC-NEXT: vinsertf128 $0, %xmm0, %ymm0, %ymm0 ; AVXNC-NEXT: {vex} vcvtneps2bf16 %ymm1, %xmm1 ; AVXNC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVXNC-NEXT: retq _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits