This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG661881d43633: [X86] Add AMX-FP16 instructions. (authored by xiangzhangllvm). Herald added a project: clang. Herald added a subscriber: cfe-commits.
Changed prior to commit: https://reviews.llvm.org/D135941?vs=469499&id=469828#toc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D135941/new/ https://reviews.llvm.org/D135941 Files: clang/docs/ReleaseNotes.rst clang/include/clang/Basic/BuiltinsX86_64.def clang/include/clang/Driver/Options.td clang/lib/Basic/Targets/X86.cpp clang/lib/Basic/Targets/X86.h clang/lib/Headers/CMakeLists.txt clang/lib/Headers/amxfp16intrin.h clang/lib/Headers/cpuid.h clang/lib/Headers/immintrin.h clang/lib/Sema/SemaChecking.cpp clang/test/CodeGen/X86/amx_errors.c clang/test/CodeGen/amx_fp16.c clang/test/Driver/x86-target-features.c clang/test/Preprocessor/x86_target_features.c llvm/docs/ReleaseNotes.rst llvm/include/llvm/IR/IntrinsicsX86.td llvm/include/llvm/Support/X86TargetParser.def llvm/lib/Support/Host.cpp llvm/lib/Support/X86TargetParser.cpp llvm/lib/Target/X86/X86.td llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86InstrAMX.td llvm/lib/Target/X86/X86InstrInfo.td llvm/test/CodeGen/X86/amx_fp16_intrinsics.ll llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-fp16.txt llvm/test/MC/X86/AMX/x86-64-amx-fp16-att.s llvm/test/MC/X86/AMX/x86-64-amx-fp16-intel.s
Index: llvm/test/MC/X86/AMX/x86-64-amx-fp16-intel.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/AMX/x86-64-amx-fp16-intel.s @@ -0,0 +1,5 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: tdpfp16ps tmm3, tmm4, tmm5 +// CHECK: encoding: [0xc4,0xe2,0x53,0x5c,0xdc] + tdpfp16ps tmm3, tmm4, tmm5 Index: llvm/test/MC/X86/AMX/x86-64-amx-fp16-att.s =================================================================== --- /dev/null +++ llvm/test/MC/X86/AMX/x86-64-amx-fp16-att.s @@ -0,0 +1,5 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: tdpfp16ps %tmm5, %tmm4, %tmm3 +// CHECK: encoding: [0xc4,0xe2,0x53,0x5c,0xdc] + tdpfp16ps %tmm5, %tmm4, %tmm3 Index: llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-fp16.txt =================================================================== --- /dev/null +++ llvm/test/MC/Disassembler/X86/AMX/x86-64-amx-tile-fp16.txt @@ -0,0 +1,6 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck -check-prefix=ATT %s +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck -check-prefix=INTEL %s + +# ATT: tdpfp16ps %tmm5, %tmm4, %tmm3 +# INTEL: tdpfp16ps tmm3, tmm4, tmm5 +0xc4,0xe2,0x53,0x5c,0xdc Index: llvm/test/CodeGen/X86/amx_fp16_intrinsics.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/amx_fp16_intrinsics.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-fp16 | FileCheck %s + +; CHECK-LABEL: test_amx: +; CHECK: # %bb.0: +; CHECK: tdpfp16ps %tmm1, %tmm2, %tmm3 + +define void @test_amx() { +call void @llvm.x86.tdpfp16ps(i8 3, i8 2, i8 1) + +ret void +} +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +declare void @llvm.x86.tdpfp16ps(i8 %tile3, i8 %tile2, i8 %tile1) Index: llvm/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.td +++ llvm/lib/Target/X86/X86InstrInfo.td @@ -980,6 +980,7 @@ def HasCX16 : Predicate<"Subtarget->hasCX16()">; def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">; def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">; +def HasAMXFP16 : Predicate<"Subtarget->hasAMXFP16()">; def HasKL : Predicate<"Subtarget->hasKL()">; def HasWIDEKL : Predicate<"Subtarget->hasWIDEKL()">; def HasHRESET : Predicate<"Subtarget->hasHRESET()">; Index: llvm/lib/Target/X86/X86InstrAMX.td =================================================================== --- llvm/lib/Target/X86/X86InstrAMX.td +++ llvm/lib/Target/X86/X86InstrAMX.td @@ -185,3 +185,21 @@ } } } // HasAMXTILE, HasAMXBF16 + +//AMX-FP16 +let Predicates = [HasAMXFP16, In64BitMode] in { + let SchedRW = [WriteSystem] in { + let Constraints = "$src1 = $dst" in { + def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst), + (ins TILE:$src1, TILE:$src2, TILE:$src3), + "tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", + []>, VEX_4V, T8XD; + } + let usesCustomInserter = 1 in { + def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1, + u8imm:$src2, u8imm:$src3), + [(int_x86_tdpfp16ps timm:$src1, + timm:$src2, timm:$src3)]>; + } + } +} // HasAMXTILE, HasAMXFP16 Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36925,7 +36925,8 @@ case X86::PTDPBSUD: case X86::PTDPBUSD: case X86::PTDPBUUD: - case X86::PTDPBF16PS: { + case X86::PTDPBF16PS: + case X86::PTDPFP16PS: { unsigned Opc; switch (MI.getOpcode()) { default: llvm_unreachable("illegal opcode!"); @@ -36934,6 +36935,7 @@ case X86::PTDPBUSD: Opc = X86::TDPBUSD; break; case X86::PTDPBUUD: Opc = X86::TDPBUUD; break; case X86::PTDPBF16PS: Opc = X86::TDPBF16PS; break; + case X86::PTDPFP16PS: Opc = X86::TDPFP16PS; break; } MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc)); Index: llvm/lib/Target/X86/X86.td =================================================================== --- llvm/lib/Target/X86/X86.td +++ llvm/lib/Target/X86/X86.td @@ -257,6 +257,10 @@ def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", "Support AMX-BF16 instructions", [FeatureAMXTILE]>; +def FeatureAMXFP16 : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true", + "Support AMX amx-fp16 instructions", + [FeatureAMXTILE]>; + def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", "Invalidate Process-Context Identifier">; def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", Index: llvm/lib/Support/X86TargetParser.cpp =================================================================== --- llvm/lib/Support/X86TargetParser.cpp +++ llvm/lib/Support/X86TargetParser.cpp @@ -578,6 +578,7 @@ // AMX Features constexpr FeatureBitset ImpliedFeaturesAMX_TILE = {}; constexpr FeatureBitset ImpliedFeaturesAMX_BF16 = FeatureAMX_TILE; +constexpr FeatureBitset ImpliedFeaturesAMX_FP16 = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesHRESET = {}; Index: llvm/lib/Support/Host.cpp =================================================================== --- llvm/lib/Support/Host.cpp +++ llvm/lib/Support/Host.cpp @@ -1807,6 +1807,7 @@ MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave; Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; + Features["amxfp16"] = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave; Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1); Index: llvm/include/llvm/Support/X86TargetParser.def =================================================================== --- llvm/include/llvm/Support/X86TargetParser.def +++ llvm/include/llvm/Support/X86TargetParser.def @@ -202,6 +202,7 @@ X86_FEATURE (XSAVES, "xsaves") X86_FEATURE (HRESET, "hreset") X86_FEATURE (AVX512FP16, "avx512fp16") +X86_FEATURE (AMX_FP16, "amx-fp16") X86_FEATURE (AVXVNNI, "avxvnni") // These features aren't really CPU features, but the frontend can set them. X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk") Index: llvm/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsX86.td +++ llvm/include/llvm/IR/IntrinsicsX86.td @@ -5115,6 +5115,14 @@ Intrinsic<[llvm_anyvector_ty], [llvm_x86amx_ty], [IntrNoMem]>; } +//===----------------------------------------------------------------------===// +let TargetPrefix = "x86" in { +// AMX_FP16 - Intel FP16 AMX extensions + def int_x86_tdpfp16ps : ClangBuiltin<"__builtin_ia32_tdpfp16ps">, + Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], + [ImmArg<ArgIndex<0>>, + ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>; +} //===----------------------------------------------------------------------===// // UINTR - User Level Interrupt Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -138,6 +138,7 @@ * Add support for the ``RDMSRLIST and WRMSRLIST`` instructions. * Add support for the ``WRMSRNS`` instruction. +* Support ISA of ``AMX-FP16`` which contains ``tdpfp16ps`` instruction. Changes to the OCaml bindings ----------------------------- Index: clang/test/Preprocessor/x86_target_features.c =================================================================== --- clang/test/Preprocessor/x86_target_features.c +++ clang/test/Preprocessor/x86_target_features.c @@ -545,6 +545,20 @@ // NOUINTR-NOT: #define __UINTR__ 1 +// RUN: %clang -target x86_64-unknown-linux-gnu -march=atom -mamx-fp16 -x c \ +// RUN: -E -dM -o - %s | FileCheck -check-prefix=AMX-FP16 %s + +// AMX-FP16: #define __AMXFP16__ 1 +// AMX-FP16: #define __AMXTILE__ 1 + +// RUN: %clang -target x86_64-unknown-linux-gnu -march=atom -mno-amx-fp16 \ +// RUN: -x c -E -dM -o - %s | FileCheck -check-prefix=NO-AMX-FP16 %s +// RUN: %clang -target x86_64-unknown-linux-gnu -march=atom -mamx-fp16 \ +// RUN: -mno-amx-tile -x c -E -dM -o - %s | FileCheck -check-prefix=NO-AMX-FP16 %s + +// NO-AMX-FP16-NOT: #define __AMXFP16__ 1 +// NO-AMX-FP16-NOT: #define __AMXTILE__ 1 + // RUN: %clang -target i386-unknown-unknown -march=atom -mavxvnni -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXVNNI %s // AVXVNNI: #define __AVX2__ 1 Index: clang/test/Driver/x86-target-features.c =================================================================== --- clang/test/Driver/x86-target-features.c +++ clang/test/Driver/x86-target-features.c @@ -290,6 +290,13 @@ // AMX-INT8: "-target-feature" "+amx-int8" // NO-AMX-INT8: "-target-feature" "-amx-int8" +// RUN: %clang --target=x86_64 -mamx-fp16 %s \ +// RUN: -### -o %t.o 2>&1 | FileCheck -check-prefix=AMX-FP16 %s +// RUN: %clang --target=x86_64 -mno-amx-fp16 \ +// RUN: %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AMX-FP16 %s +// AMX-FP16: "-target-feature" "+amx-fp16" +// NO-AMX-FP16: "-target-feature" "-amx-fp16" + // RUN: %clang --target=i386 -march=i386 -mhreset %s -### 2>&1 | FileCheck -check-prefix=HRESET %s // RUN: %clang --target=i386 -march=i386 -mno-hreset %s -### 2>&1 | FileCheck -check-prefix=NO-HRESET %s // HRESET: "-target-feature" "+hreset" Index: clang/test/CodeGen/amx_fp16.c =================================================================== --- /dev/null +++ clang/test/CodeGen/amx_fp16.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \ +// RUN: -target-feature +amx-tile -target-feature +amx-int8 -target-feature +amx-bf16 -target-feature +amx-fp16 -emit-llvm -o - -Wall -Werror -pedantic \ +// RUN: -Wno-gnu-statement-expression| FileCheck %s + +#include <immintrin.h> +#include <stddef.h> +void test_tile_dpfp16ps(void) { + // CHECK-LABEL: @test_tile_dpfp16ps + // CHECK: call void @llvm.x86.tdpfp16ps(i8 1, i8 2, i8 3) + _tile_dpfp16ps(1, 2, 3); +} Index: clang/test/CodeGen/X86/amx_errors.c =================================================================== --- clang/test/CodeGen/X86/amx_errors.c +++ clang/test/CodeGen/X86/amx_errors.c @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile -target-feature +amx-int8 -target-feature +amx-bf16 -emit-llvm -fsyntax-only -verify +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile \ +// RUN: -target-feature +amx-int8 -target-feature +amx-bf16 -target-feature +amx-fp16 -emit-llvm -fsyntax-only -verify #include <immintrin.h> @@ -14,4 +15,7 @@ _tile_dpbsud(7, 1, 7); // expected-error {{tile arguments must refer to different tiles}} _tile_dpbsud(4, 3, 3); // expected-error {{tile arguments must refer to different tiles}} _tile_dpbf16ps(4, 3, 3); // expected-error {{tile arguments must refer to different tiles}} + _tile_dpfp16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}} + _tile_dpfp16ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}} + _tile_dpfp16ps(1, 2, 2); // expected-error {{tile arguments must refer to different tiles}} } Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -5026,6 +5026,7 @@ case X86::BI__builtin_ia32_tdpbusd: case X86::BI__builtin_ia32_tdpbuud: case X86::BI__builtin_ia32_tdpbf16ps: + case X86::BI__builtin_ia32_tdpfp16ps: return CheckX86BuiltinTileRangeAndDuplicate(TheCall, {0, 1, 2}); } } Index: clang/lib/Headers/immintrin.h =================================================================== --- clang/lib/Headers/immintrin.h +++ clang/lib/Headers/immintrin.h @@ -508,6 +508,10 @@ defined(__INVPCID__) #include <invpcidintrin.h> #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AMXFP16__) +#include <amxfp16intrin.h> +#endif #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__KL__) || defined(__WIDEKL__) Index: clang/lib/Headers/cpuid.h =================================================================== --- clang/lib/Headers/cpuid.h +++ clang/lib/Headers/cpuid.h @@ -202,6 +202,7 @@ /* Features in %eax for leaf 7 sub-leaf 1 */ #define bit_AVXVNNI 0x00000010 #define bit_AVX512BF16 0x00000020 +#define bit_AMXFP16 0x00200000 #define bit_HRESET 0x00400000 /* Features in %edx for leaf 7 sub-leaf 1 */ Index: clang/lib/Headers/amxfp16intrin.h =================================================================== --- /dev/null +++ clang/lib/Headers/amxfp16intrin.h @@ -0,0 +1,58 @@ +/*===------------- amxfp16intrin.h - AMX_FP16 intrinsics -*- C++ -*---------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use <amxfp16intrin.h> directly; use <immintrin.h> instead." +#endif /* __IMMINTRIN_H */ + +#ifndef __AMX_FP16INTRIN_H +#define __AMX_FP16INTRIN_H +#ifdef __x86_64__ + +/// Compute dot-product of FP16 (16-bit) floating-point pairs in tiles \a a +/// and \a b, accumulating the intermediate single-precision (32-bit) +/// floating-point elements with elements in \a dst, and store the 32-bit +/// result back to tile \a dst. +/// +/// \headerfile <x86intrin.h> +/// +/// \code +/// void _tile_dpfp16ps (__tile dst, __tile a, __tile b) +/// \endcode +/// +/// \code{.operation} +/// FOR m := 0 TO dst.rows - 1 +/// tmp := dst.row[m] +/// FOR k := 0 TO (a.colsb / 4) - 1 +/// FOR n := 0 TO (dst.colsb / 4) - 1 +/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * +/// FP32(b.row[k].fp16[2*n+0]) +/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * +/// FP32(b.row[k].fp16[2*n+1]) +/// ENDFOR +/// ENDFOR +/// write_row_and_zero(dst, m, tmp, dst.colsb) +/// ENDFOR +/// zero_upper_rows(dst, dst.rows) +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TDPFP16PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param a +/// The 1st source tile. Max size is 1024 Bytes. +/// \param b +/// The 2nd source tile. Max size is 1024 Bytes. +#define _tile_dpfp16ps(dst, a, b) \ + __builtin_ia32_tdpfp16ps(dst, a, b) + +#endif /* __x86_64__ */ +#endif /* __AMX_FP16INTRIN_H */ Index: clang/lib/Headers/CMakeLists.txt =================================================================== --- clang/lib/Headers/CMakeLists.txt +++ clang/lib/Headers/CMakeLists.txt @@ -111,6 +111,7 @@ # Intrinsics adxintrin.h ammintrin.h + amxfp16intrin.h amxintrin.h avx2intrin.h avx512bf16intrin.h Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -137,6 +137,7 @@ bool HasPTWRITE = false; bool HasINVPCID = false; bool HasENQCMD = false; + bool HasAMXFP16 = false; bool HasKL = false; // For key locker bool HasWIDEKL = false; // For wide key locker bool HasHRESET = false; Index: clang/lib/Basic/Targets/X86.cpp =================================================================== --- clang/lib/Basic/Targets/X86.cpp +++ clang/lib/Basic/Targets/X86.cpp @@ -328,6 +328,8 @@ HasHRESET = true; } else if (Feature == "+amx-bf16") { HasAMXBF16 = true; + } else if (Feature == "+amx-fp16") { + HasAMXFP16 = true; } else if (Feature == "+amx-int8") { HasAMXINT8 = true; } else if (Feature == "+amx-tile") { @@ -778,6 +780,8 @@ Builder.defineMacro("__AMXINT8__"); if (HasAMXBF16) Builder.defineMacro("__AMXBF16__"); + if (HasAMXFP16) + Builder.defineMacro("__AMXFP16__"); if (HasAVXVNNI) Builder.defineMacro("__AVXVNNI__"); if (HasSERIALIZE) @@ -881,6 +885,7 @@ .Case("adx", true) .Case("aes", true) .Case("amx-bf16", true) + .Case("amx-fp16", true) .Case("amx-int8", true) .Case("amx-tile", true) .Case("avx", true) @@ -976,6 +981,7 @@ .Case("adx", HasADX) .Case("aes", HasAES) .Case("amx-bf16", HasAMXBF16) + .Case("amx-fp16", HasAMXFP16) .Case("amx-int8", HasAMXINT8) .Case("amx-tile", HasAMXTILE) .Case("avxvnni", HasAVXVNNI) Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -4522,6 +4522,8 @@ def mno_3dnowa : Flag<["-"], "mno-3dnowa">, Group<m_x86_Features_Group>; def mamx_bf16 : Flag<["-"], "mamx-bf16">, Group<m_x86_Features_Group>; def mno_amx_bf16 : Flag<["-"], "mno-amx-bf16">, Group<m_x86_Features_Group>; +def mamx_fp16 : Flag<["-"], "mamx-fp16">, Group<m_x86_Features_Group>; +def mno_amx_fp16 : Flag<["-"], "mno-amx-fp16">, Group<m_x86_Features_Group>; def mamx_int8 : Flag<["-"], "mamx-int8">, Group<m_x86_Features_Group>; def mno_amx_int8 : Flag<["-"], "mno-amx-int8">, Group<m_x86_Features_Group>; def mamx_tile : Flag<["-"], "mamx-tile">, Group<m_x86_Features_Group>; Index: clang/include/clang/Basic/BuiltinsX86_64.def =================================================================== --- clang/include/clang/Basic/BuiltinsX86_64.def +++ clang/include/clang/Basic/BuiltinsX86_64.def @@ -135,6 +135,9 @@ TARGET_BUILTIN(__builtin_ia32_prefetchi, "vvC*Ui", "nc", "prefetchi") +// AMX_FP16 FP16 +TARGET_BUILTIN(__builtin_ia32_tdpfp16ps, "vIUcIUcIUc", "n", "amx-fp16") + #undef BUILTIN #undef TARGET_BUILTIN #undef TARGET_HEADER_BUILTIN Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -595,6 +595,7 @@ -------------------- - Support ``-mindirect-branch-cs-prefix`` for call and jmp to indirect thunk. - Fix 32-bit ``__fastcall`` and ``__vectorcall`` ABI mismatch with MSVC. +- Add ISA of ``AMX-FP16`` which support ``_tile_dpfp16ps``. - Switch ``AVX512-BF16`` intrinsics types from ``short`` to ``__bf16``. - Add support for ``PREFETCHI`` instructions.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits