https://github.com/jthackray updated https://github.com/llvm/llvm-project/pull/118492
>From 609cf3fbdb28c155f4b8c787c1e2cb791c8a292f Mon Sep 17 00:00:00 2001 From: Jonathan Thackray <jonathan.thack...@arm.com> Date: Fri, 29 Nov 2024 11:27:03 +0000 Subject: [PATCH 1/4] [AArch64] Add intrinsics for SME FP8 FDOT LANE instructions Co-authored-by: Momchil Velikov <momchil.veli...@arm.com> Co-authored-by: Marian Lukac <marian.lu...@arm.com> Co-authored-by: Caroline Concatto <caroline.conca...@arm.com> --- clang/include/clang/Basic/arm_sme.td | 5 + clang/include/clang/Basic/arm_sve_sme_incl.td | 1 + clang/lib/CodeGen/CGBuiltin.cpp | 6 + .../sme2-intrinsics/acle_sme2_fp8_fdot.c | 57 +++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 22 +++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td | 141 ++++++++++++++++++ .../AArch64/sme2-intrinsics-fp8-fdot.ll | 32 ++++ 8 files changed, 266 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-fp8-fdot.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 0f689e82bdb742..87ed68c03430cd 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -740,6 +740,11 @@ let SMETargetGuard = "sme2" in { def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } +// FDOT +let SMETargetGuard = "sme2,sme-f8f16" in { + def SVDOT_LANE_FP8_ZA16_VG1x2 : Inst<"svdot_lane_za16[_mf8]_vg1x2_fpm", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVDOT_LANE_FP8_ZA16_VG1x4 : Inst<"svdot_lane_za16[_mf8]_vg1x4_fpm", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; +} //////////////////////////////////////////////////////////////////////////////// // SME2p1 - FMOPA, FMOPS (non-widening) let SMETargetGuard = "sme-b16b16" in { diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index de10be7bdce0db..e7cc40db7dca6c 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -52,6 +52,7 @@ include "arm_immcheck_incl.td" // h: half-float // d: double // b: bfloat +// m: mfloat8 // Typespec modifiers // ------------------ diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7588f8427cdd38..7de5e8bcd439d7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10183,6 +10183,8 @@ CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { case SVETypeFlags::EltTyInt64: return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2); + case SVETypeFlags::EltTyMFloat8: + return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16); case SVETypeFlags::EltTyFloat16: return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8); case SVETypeFlags::EltTyBFloat16: @@ -11234,6 +11236,10 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, BuiltinID == SME::BI__builtin_sme_svstr_za) return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic); + // Emit set FPMR for intrinsics that require it + if (TypeFlags.setsFPMR()) + Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), + Ops.pop_back_val()); // Handle builtins which require their multi-vector operands to be swapped swapCommutativeSMEOperands(BuiltinID, Ops); diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c new file mode 100644 index 00000000000000..999b1940df80c4 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c @@ -0,0 +1,57 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: aarch64-registered-target +#include <arm_sme.h> + +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include <arm_sme.h> + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svdot_lane_za16_f8_vg1x2( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z29test_svdot_lane_za16_f8_vg1x2j13svmfloat8x2_tu13__SVMfloat8_tm( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3) +// CPP-CHECK-NEXT: ret void +// +void test_svdot_lane_za16_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, + svmfloat8_t zm, fpm_t fpmr) + __arm_streaming __arm_inout("za") { + SVE_ACLE_FUNC(svdot_lane_za16,_mf8,_vg1x2_fpm)(slice, zn, zm, 3, fpmr); +} + + +// CHECK-LABEL: define dso_local void @test_svdot_lane_za16_f8_vg1x4( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]], i32 3) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z29test_svdot_lane_za16_f8_vg1x4j13svmfloat8x4_tu13__SVMfloat8_tm( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]], i32 3) +// CPP-CHECK-NEXT: ret void +// +void test_svdot_lane_za16_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn, + svmfloat8_t zm, fpm_t fpmr) + __arm_streaming __arm_inout("za") { + SVE_ACLE_FUNC(svdot_lane_za16,_mf8,_vg1x4_fpm)(slice, zn, zm, 3, fpmr); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index a91616b9556828..c4ce8c1917c9b6 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3864,3 +3864,25 @@ def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic; // Neon absolute maximum and minimum def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic; + +// SME FDOT instructions +let TargetPrefix = "aarch64" in { + + +class SME2_FP8_FDOT_LANE_VG1x2 : + DefaultAttrsIntrinsic<[], [llvm_i32_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty, + llvm_nxv16i8_ty, + llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrHasSideEffects, ImmArg<ArgIndex<4>>]>; + +class SME2_FP8_FDOT_LANE_VG1x4 : + DefaultAttrsIntrinsic<[], [llvm_i32_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, + llvm_nxv16i8_ty, + llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrHasSideEffects, ImmArg<ArgIndex<6>>]>; + + def int_aarch64_sme_fp8_fdot_lane_za16_vg1x2 : SME2_FP8_FDOT_LANE_VG1x2; + def int_aarch64_sme_fp8_fdot_lane_za16_vg1x4 : SME2_FP8_FDOT_LANE_VG1x4; +} diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 37ac915d1d8808..f02a4b7bdbfaa8 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -986,8 +986,8 @@ def LUTI4_S_4ZZT2Z : sme2_luti4_vector_vg4_strided<0b00, 0b00, "luti4">; let Predicates = [HasSMEF8F16] in { defm FVDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_f8f16<"fvdot", 0b11, 0b110, ZZ_b_mul_r, ZPR4b8>; -defm FDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_f8f16<"fdot", 0b11, 0b010, ZZ_b_mul_r, ZPR4b8>; -defm FDOT_VG4_M4ZZI_BtoH : sme2p1_multi_vec_array_vg4_index_f8f16<"fdot", 0b100, ZZZZ_b_mul_r, ZPR4b8>; +defm FDOT_VG2_M2ZZI_BtoH : sme2_fp8_fdot_index_za16_vg1x2<"fdot", 0b11, 0b010, ZZ_b_mul_r, int_aarch64_sme_fp8_fdot_lane_za16_vg1x2>; +defm FDOT_VG4_M4ZZI_BtoH : sme2_fp8_fdot_index_za16_vg1x4<"fdot", 0b100, ZZZZ_b_mul_r, int_aarch64_sme_fp8_fdot_lane_za16_vg1x4>; defm FDOT_VG2_M2ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010001, MatrixOp16, ZZ_b, ZPR4b8>; defm FDOT_VG4_M4ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110001, MatrixOp16, ZZZZ_b, ZPR4b8>; // TODO: Replace nxv16i8 by nxv16f8 diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 776472e72af05a..4b68df1b5ff29b 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -219,6 +219,37 @@ class SME2_Tile_Movaz_Pat<string name, SDPatternOperator intrinsic, ValueType ou : Pat<(out_vt (intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)))), (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset)>; + +// FP8 SME FDOT instructions + +// Selection DAG patterns - map to first level of pseudo-instructions (xxx_PSEUDO) +class SME2_FP8_FMLA_FDOT_Index_VG1x2_Pat<string name, SDPatternOperator intrinsic, + ComplexPattern tileslice, Operand offset_ty, Operand imm_ty, + ValueType vt = nxv16i8> + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset)), + vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)), + (!cast<Instruction>(name # _PSEUDO) $base, $offset, + (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), + ZPR4b8:$Zm, imm_ty:$i)>; + +class SME2_FP8_FMLA_FDOT_Index_VG1x4_Pat<string name, SDPatternOperator intrinsic, + ComplexPattern tileslice, Operand offset_ty, Operand imm_ty, + ValueType vt = nxv16i8> + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset)), + vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, + vt:$Zm, (i32 imm_ty:$i)), + (!cast<Instruction>(name # _PSEUDO) $base, $offset, + (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), + ZPR4b8:$Zm, imm_ty:$i)>; + +class sme2_fp8_fmla_fdot_index_pseudo<string name, Operand offset_ty, RegisterOperand src1_ty, RegisterOperand src2_ty, Operand imm_ty> + : SMEPseudo2Instr<name, 0>, + Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs, src1_ty:$Zn, src2_ty:$Zm, imm_ty:$i), []> { + let mayLoad = 1; + let SMEMatrixType = SMEMatrixArray; + let usesCustomInserter = 1; +} + //===----------------------------------------------------------------------===// // SME pattern match helpers. //===----------------------------------------------------------------------===// @@ -5737,3 +5768,113 @@ multiclass sme2_fmop4a_fp8_fp16_2way<string mnemonic> { // Multiple vectors def _M2Z2Z_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b1, 0b1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>; } + +// FP8 SME FDOT instructions + +// Selection DAG patterns - map to first level of pseudo-instructions (xxx_PSEUDO) + +class SME2_FP8_FDOT_Index_VG1x2_Pat<string name, SDPatternOperator intrinsic, + ComplexPattern tileslice, Operand offset_ty, Operand imm_ty, + ValueType vt = nxv16i8> + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset)), + vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i), i64:$fpmr), + (!cast<Instruction>(name # _PSEUDO) $base, $offset, + (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), + ZPR4b8:$Zm, imm_ty:$i, GPR64:$fpmr)>; + +class SME2_FP8_FDOT_Index_VG1x4_Pat<string name, SDPatternOperator intrinsic, + ComplexPattern tileslice, Operand offset_ty, Operand imm_ty, + ValueType vt = nxv16i8> + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset)), + vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, + vt:$Zm, (i32 imm_ty:$i), i64:$fpmr), + (!cast<Instruction>(name # _PSEUDO) $base, $offset, + (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), + ZPR4b8:$Zm, imm_ty:$i, GPR64:$fpmr)>; + +// First level pseudo-instructions (xxx_PSEUDO) - transformed to second level pseudo-instructions (xxx_FPMR_PSEUDO) +// during instruction selection. +class sme2_fp8_fdot_index_pseudo<string name, Operand offset_ty, RegisterOperand src1_ty, RegisterOperand src2_ty, Operand imm_ty> + : SMEPseudo2Instr<name, 0>, + Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs, src1_ty:$Zn, src2_ty:$Zm, imm_ty:$i, GPR64:$fpmr), []> { + let SMEMatrixType = SMEMatrixArray; + let usesCustomInserter = 1; +} + +class sme2_fp8_fdot_pseudo<string name, Operand offset_ty, RegisterOperand src1_ty, RegisterOperand src2_ty> + : SMEPseudo2Instr<name, 0>, + Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs, src1_ty:$Zn, src2_ty:$Zm, GPR64:$fpmr), []> { + let SMEMatrixType = SMEMatrixArray; + let usesCustomInserter = 1; +} + +// Second level pseudo-instruction - expanded to real instruction by the AArch64 pseudo instruction expansion pass +class sme2_fp8_fdot_index_fpmr_pseudo<string name, MatrixOperand matrix_ty, Operand offset_ty, + RegisterOperand src1_ty, RegisterOperand src2_ty, + Operand imm_ty> + : Pseudo<(outs matrix_ty:$ZAda), + (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs, + src1_ty:$Zn, src2_ty:$Zm, imm_ty:$i, GPR64:$fpmr), []>, + SMEPseudo2Instr<name, 1> { + let hasNoSchedulingInfo = 1; + let Constraints = "$ZAda = $_ZAda"; +} + +class sme2_fp8_fdot_fpmr_pseudo<string name, MatrixOperand matrix_ty, Operand offset_ty, + RegisterOperand src1_ty, RegisterOperand src2_ty> + : Pseudo<(outs matrix_ty:$ZAda), + (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs, + src1_ty:$Zn, src2_ty:$Zm, GPR64:$fpmr), []>, + SMEPseudo2Instr<name, 1> { + let hasNoSchedulingInfo = 1; + let Constraints = "$ZAda = $_ZAda"; +} + +// FDOT instructions +multiclass sme2_fp8_fdot_index_za16_vg1x2<string mnemonic, bits<2> sz, bits<3> op, + RegisterOperand multi_vector_ty, SDPatternOperator intrinsic> { + def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16, + multi_vector_ty, ZPR4b8, + VectorIndexH32b_timm, mnemonic>, + SMEPseudo2Instr<NAME, 1>{ + let Uses=[FPMR, FPCR]; + let mayLoad = 1; + + bits<3> i; + let Inst{11-10} = i{2-1}; + let Inst{3} = i{0}; + } + + def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", + (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, + multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexH32b_timm:$i), 0>; + + + def _PSEUDO : sme2_fp8_fmla_fdot_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, ZPR4b8, VectorIndexH32b_timm>; + + def : SME2_FP8_FMLA_FDOT_Index_VG1x2_Pat<NAME, intrinsic, tileslice16, sme_elm_idx0_7, VectorIndexH32b_timm>; +} + +multiclass sme2_fp8_fdot_index_za16_vg1x4<string mnemonic, bits<3> op, + RegisterOperand multi_vector_ty, SDPatternOperator intrinsic> { + def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16, + multi_vector_ty, ZPR4b8, + VectorIndexH32b_timm, mnemonic>, + SMEPseudo2Instr<NAME, 1> { + let Uses=[FPMR, FPCR]; + let mayLoad = 1; + + bits<3> i; + let Inst{11-10} = i{2-1}; + let Inst{3} = i{0}; + } + + def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", + (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, + sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexH32b_timm:$i), 0>; + + + def _PSEUDO : sme2_fp8_fmla_fdot_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, ZPR4b8, VectorIndexH32b_timm>; + + def : SME2_FP8_FMLA_FDOT_Index_VG1x4_Pat<NAME, intrinsic, tileslice16, sme_elm_idx0_7, VectorIndexH32b_timm>; +} diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp8-fdot.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp8-fdot.ll new file mode 100644 index 00000000000000..138d9876fabda8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp8-fdot.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "^[ \t]*//.*$" --version 4 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+sme-f8f16,+sme-f8f32 -verify-machineinstrs -force-streaming < %s | FileCheck %s + +target triple = "aarch64-linux" + +define void @test_fdot16_1x2_indexed(i32 %slice.0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm) #0 { +; CHECK-LABEL: test_fdot16_1x2_indexed: +; CHECK: mov w8, w0 +; CHECK: fdot za.h[w8, 7, vgx2], { z0.b, z1.b }, z2.b[1] +; CHECK: ret + %slice = add i32 %slice.0, 7 + call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 %slice, + <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, + <vscale x 16 x i8> %zm, i32 1) + ret void +} + +define void @test_fdot16_1x4_indexed(i32 %slice.0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, +; CHECK-LABEL: test_fdot16_1x4_indexed: +; CHECK: mov w8, w0 +; CHECK: fdot za.h[w8, 7, vgx4], { z0.b - z3.b }, z4.b[1] +; CHECK: ret + <vscale x 16 x i8> %zm) #0 { + %slice = add i32 %slice.0, 7 + call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x4(i32 %slice, + <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, + <vscale x 16 x i8> %zm, i32 1) + ret void +} + + +attributes #0 = { "target-features" = "+sme,+sme-f8f32,+sme-f8f16" } >From 605ccdf31aba832b2056a1b89a073dfbdcd54af6 Mon Sep 17 00:00:00 2001 From: Jonathan Thackray <jonathan.thack...@arm.com> Date: Tue, 3 Dec 2024 14:29:37 +0000 Subject: [PATCH 2/4] fixup! [AArch64] Add intrinsics for SME FP8 FDOT LANE instructions --- clang/lib/CodeGen/CGBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7de5e8bcd439d7..11cb8da78879e3 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10183,7 +10183,7 @@ CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { case SVETypeFlags::EltTyInt64: return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2); - case SVETypeFlags::EltTyMFloat8: + case SVETypeFlags::EltTyMFloat8: return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16); case SVETypeFlags::EltTyFloat16: return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8); >From 863a80380023f3b375da7d358780d2919a8d58dc Mon Sep 17 00:00:00 2001 From: Jonathan Thackray <jonathan.thack...@arm.com> Date: Wed, 4 Dec 2024 21:40:35 +0000 Subject: [PATCH 3/4] fixup! [AArch64] Add intrinsics for SME FP8 FDOT LANE instructions --- clang/include/clang/Basic/arm_sme.td | 3 +- .../sme2-intrinsics/acle_sme2_fp8_fdot.c | 10 ++-- llvm/lib/Target/AArch64/SMEInstrFormats.td | 60 ------------------- 3 files changed, 7 insertions(+), 66 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 87ed68c03430cd..003da2eea5e1c9 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -741,10 +741,11 @@ let SMETargetGuard = "sme2" in { } // FDOT -let SMETargetGuard = "sme2,sme-f8f16" in { +let SMETargetGuard = "sme-f8f16" in { def SVDOT_LANE_FP8_ZA16_VG1x2 : Inst<"svdot_lane_za16[_mf8]_vg1x2_fpm", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; def SVDOT_LANE_FP8_ZA16_VG1x4 : Inst<"svdot_lane_za16[_mf8]_vg1x4_fpm", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } + //////////////////////////////////////////////////////////////////////////////// // SME2p1 - FMOPA, FMOPS (non-widening) let SMETargetGuard = "sme-b16b16" in { diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c index 999b1940df80c4..c4e4a57dd2caa3 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c @@ -2,11 +2,11 @@ // REQUIRES: aarch64-registered-target #include <arm_sme.h> -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include <arm_sme.h> #ifdef SVE_OVERLOADED_FORMS diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 4b68df1b5ff29b..82d84f326ca608 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5771,66 +5771,6 @@ multiclass sme2_fmop4a_fp8_fp16_2way<string mnemonic> { // FP8 SME FDOT instructions -// Selection DAG patterns - map to first level of pseudo-instructions (xxx_PSEUDO) - -class SME2_FP8_FDOT_Index_VG1x2_Pat<string name, SDPatternOperator intrinsic, - ComplexPattern tileslice, Operand offset_ty, Operand imm_ty, - ValueType vt = nxv16i8> - : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset)), - vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i), i64:$fpmr), - (!cast<Instruction>(name # _PSEUDO) $base, $offset, - (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), - ZPR4b8:$Zm, imm_ty:$i, GPR64:$fpmr)>; - -class SME2_FP8_FDOT_Index_VG1x4_Pat<string name, SDPatternOperator intrinsic, - ComplexPattern tileslice, Operand offset_ty, Operand imm_ty, - ValueType vt = nxv16i8> - : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset)), - vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, - vt:$Zm, (i32 imm_ty:$i), i64:$fpmr), - (!cast<Instruction>(name # _PSEUDO) $base, $offset, - (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), - ZPR4b8:$Zm, imm_ty:$i, GPR64:$fpmr)>; - -// First level pseudo-instructions (xxx_PSEUDO) - transformed to second level pseudo-instructions (xxx_FPMR_PSEUDO) -// during instruction selection. -class sme2_fp8_fdot_index_pseudo<string name, Operand offset_ty, RegisterOperand src1_ty, RegisterOperand src2_ty, Operand imm_ty> - : SMEPseudo2Instr<name, 0>, - Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs, src1_ty:$Zn, src2_ty:$Zm, imm_ty:$i, GPR64:$fpmr), []> { - let SMEMatrixType = SMEMatrixArray; - let usesCustomInserter = 1; -} - -class sme2_fp8_fdot_pseudo<string name, Operand offset_ty, RegisterOperand src1_ty, RegisterOperand src2_ty> - : SMEPseudo2Instr<name, 0>, - Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs, src1_ty:$Zn, src2_ty:$Zm, GPR64:$fpmr), []> { - let SMEMatrixType = SMEMatrixArray; - let usesCustomInserter = 1; -} - -// Second level pseudo-instruction - expanded to real instruction by the AArch64 pseudo instruction expansion pass -class sme2_fp8_fdot_index_fpmr_pseudo<string name, MatrixOperand matrix_ty, Operand offset_ty, - RegisterOperand src1_ty, RegisterOperand src2_ty, - Operand imm_ty> - : Pseudo<(outs matrix_ty:$ZAda), - (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs, - src1_ty:$Zn, src2_ty:$Zm, imm_ty:$i, GPR64:$fpmr), []>, - SMEPseudo2Instr<name, 1> { - let hasNoSchedulingInfo = 1; - let Constraints = "$ZAda = $_ZAda"; -} - -class sme2_fp8_fdot_fpmr_pseudo<string name, MatrixOperand matrix_ty, Operand offset_ty, - RegisterOperand src1_ty, RegisterOperand src2_ty> - : Pseudo<(outs matrix_ty:$ZAda), - (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs, - src1_ty:$Zn, src2_ty:$Zm, GPR64:$fpmr), []>, - SMEPseudo2Instr<name, 1> { - let hasNoSchedulingInfo = 1; - let Constraints = "$ZAda = $_ZAda"; -} - -// FDOT instructions multiclass sme2_fp8_fdot_index_za16_vg1x2<string mnemonic, bits<2> sz, bits<3> op, RegisterOperand multi_vector_ty, SDPatternOperator intrinsic> { def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16, >From 4dba844771b596923c632a69a2973df7f37c1dd8 Mon Sep 17 00:00:00 2001 From: Jonathan Thackray <jonathan.thack...@arm.com> Date: Thu, 5 Dec 2024 09:24:14 +0000 Subject: [PATCH 4/4] fixup! [AArch64] Add intrinsics for SME FP8 FDOT LANE instructions --- clang/include/clang/Basic/arm_sme.td | 9 +++ .../sme2-intrinsics/acle_sme2_fp8_fdot.c | 73 +++++++++++++++++-- .../acle_sme2_fp8_fdot.c | 29 ++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td | 44 +++++++++++ .../AArch64/sme2-intrinsics-fp8-fdot.ll | 27 +++++++ 7 files changed, 179 insertions(+), 11 deletions(-) create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fp8_fdot.c diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 003da2eea5e1c9..e760ce2575e377 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -740,7 +740,16 @@ let SMETargetGuard = "sme2" in { def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } +// +// SME2 FP8 instructions +// + // FDOT +let SMETargetGuard = "sme-f8f32" in { + def SVDOT_LANE_FP8_ZA32_VG1x2 : Inst<"svdot_lane_za32[_mf8]_vg1x2_fpm", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>; + def SVDOT_LANE_FP8_ZA32_VG1x4 : Inst<"svdot_lane_za32[_mf8]_vg1x4_fpm", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>; +} + let SMETargetGuard = "sme-f8f16" in { def SVDOT_LANE_FP8_ZA16_VG1x2 : Inst<"svdot_lane_za16[_mf8]_vg1x2_fpm", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; def SVDOT_LANE_FP8_ZA16_VG1x4 : Inst<"svdot_lane_za16[_mf8]_vg1x4_fpm", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c index c4e4a57dd2caa3..56ca8c5fb7a2e4 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c @@ -2,11 +2,11 @@ // REQUIRES: aarch64-registered-target #include <arm_sme.h> -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include <arm_sme.h> #ifdef SVE_OVERLOADED_FORMS @@ -15,15 +15,73 @@ #define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 #endif -// CHECK-LABEL: define dso_local void @test_svdot_lane_za16_f8_vg1x2( +// +// CHECK-CXX-LABEL: define dso_local void @_Z29test_svdot_lane_za32_f8_vg1x2j11svuint8x2_tu11__SVUint8_tm( +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 32 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) local_unnamed_addr #[[> +// CHECK-CXX-NEXT: entry: +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[ZN]], i64 0) +// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[ZN]], i64 16) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vs> +// CHECK-CXX-NEXT: ret void +// CHECK-LABEL: define dso_local void @test_svdot_lane_za32_f8_vg1x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z29test_svdot_lane_za32_f8_vg1x2j13svmfloat8x2_tu13__SVMfloat8_tm( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3) +// CPP-CHECK-NEXT: ret void +// +void test_svdot_lane_za32_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, + svmfloat8_t zm, uint64_t fpmr) + __arm_streaming __arm_inout("za") { + SVE_ACLE_FUNC(svdot_lane_za32,_mf8,_vg1x2_fpm)(slice, zn, zm, 3, fpmr); +} + +// +// CHECK-CXX-LABEL: define dso_local void @_Z29test_svdot_lane_za32_f8_vg1x4j11svuint8x4_tu11__SVUint8_tm( +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 64 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) local_unnamed_addr #[[> +// CHECK-CXX-NEXT: entry: +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[ZN]], i64 0) +// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[ZN]], i64 16) +// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[ZN]], i64 32) +// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[ZN]], i64 48) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vs> +// CHECK-CXX-NEXT: ret void +// CHECK-LABEL: define dso_local void @test_svdot_lane_za32_f8_vg1x4( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]], i32 3) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z29test_svdot_lane_za32_f8_vg1x4j13svmfloat8x4_tu13__SVMfloat8_tm( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]], i32 3) +// CPP-CHECK-NEXT: ret void +// +void test_svdot_lane_za32_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn, + svmfloat8_t zm, uint64_t fpmr) + __arm_streaming __arm_inout("za") { + SVE_ACLE_FUNC(svdot_lane_za32,_mf8,_vg1x4_fpm)(slice, zn, zm, 3, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svdot_lane_za16_f8_vg1x2( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define dso_local void @_Z29test_svdot_lane_za16_f8_vg1x2j13svmfloat8x2_tu13__SVMfloat8_tm( -// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: [[ENTRY:.*:]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3) @@ -35,7 +93,6 @@ void test_svdot_lane_za16_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, SVE_ACLE_FUNC(svdot_lane_za16,_mf8,_vg1x2_fpm)(slice, zn, zm, 3, fpmr); } - // CHECK-LABEL: define dso_local void @test_svdot_lane_za16_f8_vg1x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fp8_fdot.c b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fp8_fdot.c new file mode 100644 index 00000000000000..c72e60bd8fcb1e --- /dev/null +++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fp8_fdot.c @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -verify -emit-llvm -o - %s + +// REQUIRES: aarch64-registered-target + +#include <arm_sme.h> + +void test_features(uint32_t slice, svmfloat8_t f8, svmfloat8x2_t f8x2, + svmfloat8x4_t f8x4, uint64_t fpmr) __arm_streaming __arm_inout("za") { + // expected-error@+1 {{'svdot_lane_za32_mf8_vg1x2_fpm' needs target feature sme,sme-f8f32}} + svdot_lane_za32_mf8_vg1x2_fpm(slice, f8x2, f8, 3, fpmr); + // expected-error@+1 {{'svdot_lane_za32_mf8_vg1x4_fpm' needs target feature sme,sme-f8f32}} + svdot_lane_za32_mf8_vg1x4_fpm(slice, f8x4, f8, 3, fpmr); + // expected-error@+1 {{'svdot_lane_za16_mf8_vg1x2_fpm' needs target feature sme,sme-f8f16}} + svdot_lane_za16_mf8_vg1x2_fpm(slice, f8x2, f8, 3, fpmr); + // expected-error@+1 {{'svdot_lane_za16_mf8_vg1x4_fpm' needs target feature sme,sme-f8f16}} + svdot_lane_za16_mf8_vg1x4_fpm(slice, f8x4, f8, 3, fpmr); +} + +void test_imm(uint32_t slice, svmfloat8_t f8, svmfloat8x2_t f8x2, + svmfloat8x4_t f8x4, uint64_t fpmr) __arm_streaming __arm_inout("za") { +// expected-error@+1{{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svdot_lane_za32_mf8_vg1x2_fpm(slice, f8x2, f8, -1, fpmr); +// expected-error@+1{{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svdot_lane_za32_mf8_vg1x4_fpm(slice, f8x4, f8, -1, fpmr); +// expected-error@+1{{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svdot_lane_za16_mf8_vg1x2_fpm(slice, f8x2, f8, -1, fpmr); +// expected-error@+1{{argument value 18446744073709551615 is outside the valid range [0, 7]}} + svdot_lane_za16_mf8_vg1x4_fpm(slice, f8x4, f8, -1, fpmr); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index c4ce8c1917c9b6..409661c2602470 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3868,7 +3868,6 @@ def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic; // SME FDOT instructions let TargetPrefix = "aarch64" in { - class SME2_FP8_FDOT_LANE_VG1x2 : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, @@ -3885,4 +3884,7 @@ class SME2_FP8_FDOT_LANE_VG1x4 : def int_aarch64_sme_fp8_fdot_lane_za16_vg1x2 : SME2_FP8_FDOT_LANE_VG1x2; def int_aarch64_sme_fp8_fdot_lane_za16_vg1x4 : SME2_FP8_FDOT_LANE_VG1x4; + + def int_aarch64_sme_fp8_fdot_lane_za32_vg1x2 : SME2_FP8_FDOT_LANE_VG1x2; + def int_aarch64_sme_fp8_fdot_lane_za32_vg1x4 : SME2_FP8_FDOT_LANE_VG1x4; } diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index f02a4b7bdbfaa8..e68b924d581c78 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1010,8 +1010,8 @@ defm FMOPA_MPPZZ_BtoH : sme2p1_fmop_tile_f8f16<"fmopa", 0b1, 0b0, 0b01>; let Predicates = [HasSMEF8F32] in { // TODO : Replace nxv16i8 by nxv16f8 -defm FDOT_VG2_M2ZZI_BtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b01, 0b0111, ZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>; -defm FDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b0001, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>; +defm FDOT_VG2_M2ZZI_BtoS : sme2_fp8_fdot_index_za32_vg1x2<"fdot", 0b01, 0b0111, ZZ_b_mul_r, int_aarch64_sme_fp8_fdot_lane_za32_vg1x2>; +defm FDOT_VG4_M4ZZI_BtoS : sme2_fp8_fdot_index_za32_vg1x4<"fdot", 0b0001, ZZZZ_b_mul_r, int_aarch64_sme_fp8_fdot_lane_za32_vg1x4>; defm FDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010011, MatrixOp32, ZZ_b, ZPR4b8>; defm FDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110011, MatrixOp32, ZZZZ_b, ZPR4b8>; // TODO : Replace nxv16i8 by nxv16f8 diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 82d84f326ca608..e531144e9d5975 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5818,3 +5818,47 @@ multiclass sme2_fp8_fdot_index_za16_vg1x4<string mnemonic, bits<3> op, def : SME2_FP8_FMLA_FDOT_Index_VG1x4_Pat<NAME, intrinsic, tileslice16, sme_elm_idx0_7, VectorIndexH32b_timm>; } + +multiclass sme2_fp8_fdot_index_za32_vg1x2<string mnemonic, bits<2> sz, bits<4> op, + RegisterOperand multi_vector_ty, + SDPatternOperator intrinsic> { + def NAME : sme2_multi_vec_array_vg2_index<sz, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty, ZPR4b8, + VectorIndexS32b_timm, mnemonic>, + SMEPseudo2Instr<NAME, 1> { + let Uses=[FPMR, FPCR]; + let mayLoad = 1; + + bits<2> i; + let Inst{11-10} = i; + } + + def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", + (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, + multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexS32b_timm:$i), 0>; + + def _PSEUDO : sme2_fp8_fmla_fdot_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, ZPR4b8, VectorIndexS32b_timm>; + + def : SME2_FP8_FMLA_FDOT_Index_VG1x2_Pat<NAME, intrinsic, tileslice16, sme_elm_idx0_7, VectorIndexS32b_timm>; +} + +multiclass sme2_fp8_fdot_index_za32_vg1x4<string mnemonic, bits<4> op, + RegisterOperand multi_vector_ty, + SDPatternOperator intrinsic> { + def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,0b0, op{2-0}}, MatrixOp32, multi_vector_ty, + ZPR4b8, VectorIndexS32b_timm, mnemonic>, + SMEPseudo2Instr<NAME, 1> { + let Uses=[FPMR, FPCR]; + let mayLoad = 1; + + bits<2> i; + let Inst{11-10} = i; + } + + def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", + (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, + multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexS32b_timm:$i), 0>; + + def _PSEUDO : sme2_fp8_fmla_fdot_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, ZPR4b8, VectorIndexS32b_timm>; + + def : SME2_FP8_FMLA_FDOT_Index_VG1x4_Pat<NAME, intrinsic, tileslice16, sme_elm_idx0_7, VectorIndexS32b_timm>; +} diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp8-fdot.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp8-fdot.ll index 138d9876fabda8..7fcbc328aa085e 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp8-fdot.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp8-fdot.ll @@ -28,5 +28,32 @@ define void @test_fdot16_1x4_indexed(i32 %slice.0, <vscale x 16 x i8> %zn1, <vsc ret void } +define void @test_fdot32_1x2_indexed(i32 %slice.0, +; CHECK-LABEL: test_fdot32_1x2_indexed: +; CHECK: mov w8, w0 +; CHECK: fdot za.s[w8, 7, vgx2], { z0.b, z1.b }, z2.b[1] +; CHECK: ret + <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, + <vscale x 16 x i8> %zm) #0 { + %slice = add i32 %slice.0, 7 + call void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x2(i32 %slice, + <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, + <vscale x 16 x i8> %zm, i32 1) + ret void +} + +define void @test_fdot32_1x4_indexed(i32 %slice.0, +; CHECK-LABEL: test_fdot32_1x4_indexed: +; CHECK: mov w8, w0 +; CHECK: fdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[1] +; CHECK: ret + <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, + <vscale x 16 x i8> %zm) #0 { + %slice = add i32 %slice.0, 7 + call void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x4(i32 %slice, + <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, + <vscale x 16 x i8> %zm, i32 1) + ret void +} attributes #0 = { "target-features" = "+sme,+sme-f8f32,+sme-f8f16" } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits