llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: None (Lukacma) <details> <summary>Changes</summary> This patch adds fp8 variants to the following untyped SME intrinsics based on [ACLE](https://github.com/ARM-software/acle/blob/main/main/acle.md): ``` svint8_t svread_hor_za8[_s8]_m(svint8_t zd, svbool_t pg, uint64_t tile, uint32_t slice) svint8_t svread_hor_za128[_s8]_m(svint8_t zd, svbool_t pg, uint64_t tile, uint32_t slice) void svwrite_hor_za8[_s8]_m(uint64_t tile, uint32_t slice, svbool_t pg, svint8_t zn) void svwrite_hor_za128[_s8]_m(uint64_t tile, uint32_t slice, svbool_t pg, svint8_t zn) svint8_t svluti2_lane_zt_s8(uint64_t zt, svuint8_t zn, uint64_t imm_idx) svint8x2_t svluti2_lane_zt_s8_x2(uint64_t zt, svuint8_t zn, svint8x4_t svluti2_lane_zt_s8_x4(uint64_t zt, svuint8_t zn, svint8_t svluti4_lane_zt_s8(uint64_t zt, svuint8_t zn, uint64_t imm_idx) svint16x4_t svluti4_lane_zt_s16_x4(uint64_t zt, svuint8_t zn, svint8x2_t svread_hor_za8_s8_vg2(uint64_t tile, uint32_t slice) svint8x4_t svread_hor_za8_s8_vg4(uint64_t tile, uint32_t slice) svint8x2_t svread_ver_za8_s8_vg2(uint64_t tile, uint32_t slice) svint8x4_t svread_ver_za8_s8_vg4(uint64_t tile, uint32_t slice) svint8x2_t svread_za8_s8_vg1x2(uint32_t slice) svint8x4_t svread_za8_s8_vg1x4(uint32_t slice) void svwrite_hor_za8[_s8]_vg2(uint64_t tile, uint32_t slice, svint8x2_t zn) void svwrite_hor_za8[_s8]_vg4(uint64_t tile, uint32_t slice, svint8x4_t zn) void svwrite_ver_za8[_s8]_vg2(uint64_t tile, uint32_t slice, svint8x2_t zn) void svwrite_ver_za8[_s8]_vg4(uint64_t tile, uint32_t slice, svint8x4_t zn) void svwrite_za8[_s8]_vg1x2(uint32_t slice, svint8x2_t zn) void svwrite_za8[_s8]_vg1x4(uint32_t slice, svint8x4_t zn) svuint8x2_t svsel[_u8_x2](svcount_t png, svuint8x2_t zn, svuint8x2_t zm) svuint8x4_t svsel[_u8_x4](svcount_t png, svuint8x4_t zn, svuint8x4_t zm) svint8x2_t svzip[_s8_x2](svint8x2_t zn) __arm_streaming; svint8x4_t svzip[_s8_x4](svint8x4_t zn) __arm_streaming; svint8x2_t svzipq[_s8_x2](svint8x2_t zn) __arm_streaming; svint8x4_t svzipq[_s8_x4](svint8x4_t zn) __arm_streaming; svint8x2_t svuzp[_s8_x2](svint8x2_t zn) __arm_streaming; svint8x4_t svuzp[_s8_x4](svint8x4_t zn) __arm_streaming; svint8x2_t svuzpq[_s8_x2](svint8x2_t zn) __arm_streaming; svint8_t svreadz_ver_za128_s8(uint64_t tile, uint32_t slice) svint8x2_t svreadz_hor_za8_s8_vg2(uint64_t tile, uint32_t slice) svint8x4_t svreadz_hor_za8_s8_vg4(uint64_t tile, uint32_t slice) svint8x2_t svreadz_ver_za8_s8_vg2(uint64_t tile, uint32_t slice) svint8x4_t svreadz_ver_za8_s8_vg4(uint64_t tile, uint32_t slice) svint8x2_t svreadz_za8_s8_vg1x2(uint32_t slice) svint8x4_t svreadz_za8_s8_vg1x4(uint32_t slice) svint8_t svreadz_ver_za128_s8(uint64_t tile, uint32_t slice) svuint8_t svrevd[_u8]_m(svuint8_t zd, svbool_t pg, svuint8_t zn); svuint8_t svrevd[_u8]_z(svbool_t pg, svuint8_t zn); svuint8_t svrevd[_u8]_x(svbool_t pg, svuint8_t zn); ``` --- Patch is 120.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124543.diff 19 Files Affected: - (modified) clang/include/clang/Basic/arm_sme.td (+16-16) - (modified) clang/include/clang/Basic/arm_sve.td (+12-12) - (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c (+134) - (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_write.c (+135) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt.c (+13) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c (+14) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c (+14) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt.c (+14) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c (+14) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_read.c (+83-1) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx2.c (+13) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_selx4.c (+57-43) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx2.c (+28) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_uzpx4.c (+28) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx2.c (+28) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_vector_zipx4.c (+28) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_write.c (+74-4) - (modified) clang/test/CodeGen/AArch64/sme2p1-intrinsics/acle_sme2p1_movaz.c (+136) - (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_revd.c (+42) ``````````diff diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 891ed9874bb3d0..51cab572ce357d 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -110,11 +110,11 @@ multiclass ZARead<string n_suffix, string t, string i_prefix, list<ImmCheck> ch> } } -defm SVREAD_ZA8 : ZARead<"za8", "cUc", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_0>]>; +defm SVREAD_ZA8 : ZARead<"za8", "cUcm", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_0>]>; defm SVREAD_ZA16 : ZARead<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_1>]>; defm SVREAD_ZA32 : ZARead<"za32", "iUif", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_3>]>; defm SVREAD_ZA64 : ZARead<"za64", "lUld", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_7>]>; -defm SVREAD_ZA128 : ZARead<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_readq", [ImmCheck<2, ImmCheck0_15>]>; +defm SVREAD_ZA128 : ZARead<"za128", "csilUcUsUiUlmhbfd", "aarch64_sme_readq", [ImmCheck<2, ImmCheck0_15>]>; //////////////////////////////////////////////////////////////////////////////// // Write horizontal/vertical ZA slices @@ -131,11 +131,11 @@ multiclass ZAWrite<string n_suffix, string t, string i_prefix, list<ImmCheck> ch } } -defm SVWRITE_ZA8 : ZAWrite<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>; +defm SVWRITE_ZA8 : ZAWrite<"za8", "cUcm", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>; defm SVWRITE_ZA16 : ZAWrite<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>; defm SVWRITE_ZA32 : ZAWrite<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>; defm SVWRITE_ZA64 : ZAWrite<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>; -defm SVWRITE_ZA128 : ZAWrite<"za128", "csilUcUsUiUlhbfd", "aarch64_sme_writeq", [ImmCheck<0, ImmCheck0_15>]>; +defm SVWRITE_ZA128 : ZAWrite<"za128", "csilUcUsUiUlmhbfd", "aarch64_sme_writeq", [ImmCheck<0, ImmCheck0_15>]>; //////////////////////////////////////////////////////////////////////////////// // SME - Zero @@ -350,7 +350,7 @@ multiclass ZAWrite_VG<string n, string t, string i, list<ImmCheck> checks> { } let SMETargetGuard = "sme2" in { - defm SVWRITE_ZA8 : ZAWrite_VG<"za8", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>; + defm SVWRITE_ZA8 : ZAWrite_VG<"za8", "cUcm", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_0>]>; defm SVWRITE_ZA16 : ZAWrite_VG<"za16", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>]>; defm SVWRITE_ZA32 : ZAWrite_VG<"za32", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>]>; defm SVWRITE_ZA64 : ZAWrite_VG<"za64", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>]>; @@ -366,7 +366,7 @@ multiclass ZARead_VG<string n, string t, string i, list<ImmCheck> checks> { } let SMETargetGuard = "sme2" in { - defm SVREAD_ZA8 : ZARead_VG<"za8", "cUc", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>; + defm SVREAD_ZA8 : ZARead_VG<"za8", "cUcm", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_0>]>; defm SVREAD_ZA16 : ZARead_VG<"za16", "sUshb", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_1>]>; defm SVREAD_ZA32 : ZARead_VG<"za32", "iUif", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_3>]>; defm SVREAD_ZA64 : ZARead_VG<"za64", "lUld", "aarch64_sme_read", [ImmCheck<0, ImmCheck0_7>]>; @@ -722,7 +722,7 @@ def IN_STREAMING_MODE : Inst<"__arm_in_streaming_mode", "sv", "Pc", MergeNone, // lookup table expand four contiguous registers // let SMETargetGuard = "sme2" in { - def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt_{d}_x4", "4.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; + def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt_{d}_x4", "4.di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt_{d}_x4", "4.di[i", "sUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>; } @@ -730,16 +730,16 @@ let SMETargetGuard = "sme2" in { // lookup table expand one register // let SMETargetGuard = "sme2" in { - def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; - def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; + def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt_{d}", "di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; + def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt_{d}", "di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; } // // lookup table expand two contiguous registers // let SMETargetGuard = "sme2" in { - def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; - def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; + def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; + def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUimbhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } // @@ -811,12 +811,12 @@ multiclass ZAReadz<string n_suffix, string vg_num, string t, string i_prefix, li } } -defm SVREADZ_ZA8_X2 : ZAReadz<"za8", "2", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>; +defm SVREADZ_ZA8_X2 : ZAReadz<"za8", "2", "cUcm", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>; defm SVREADZ_ZA16_X2 : ZAReadz<"za16", "2", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>; defm SVREADZ_ZA32_X2 : ZAReadz<"za32", "2", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>; defm SVREADZ_ZA64_X2 : ZAReadz<"za64", "2", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>; -defm SVREADZ_ZA8_X4 : ZAReadz<"za8", "4", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>; +defm SVREADZ_ZA8_X4 : ZAReadz<"za8", "4", "cUcm", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>; defm SVREADZ_ZA16_X4 : ZAReadz<"za16", "4", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>; defm SVREADZ_ZA32_X4 : ZAReadz<"za32", "4", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>; defm SVREADZ_ZA64_X4 : ZAReadz<"za64", "4", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>; @@ -834,15 +834,15 @@ multiclass ZAReadzSingle<string n_suffix, string t, string i_prefix, list<ImmChe } } -defm SVREADZ_ZA8 : ZAReadzSingle<"za8", "cUc", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>; +defm SVREADZ_ZA8 : ZAReadzSingle<"za8", "cUcm", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_0>]>; defm SVREADZ_ZA16 : ZAReadzSingle<"za16", "sUshb", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_1>]>; defm SVREADZ_ZA32 : ZAReadzSingle<"za32", "iUif", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_3>]>; defm SVREADZ_ZA64 : ZAReadzSingle<"za64", "lUld", "aarch64_sme_readz", [ImmCheck<0, ImmCheck0_7>]>; -defm SVREADZ_ZA128 : ZAReadzSingle<"za128", "csilUcUiUsUlbhfd", "aarch64_sme_readz_q", [ImmCheck<0, ImmCheck0_15>]>; +defm SVREADZ_ZA128 : ZAReadzSingle<"za128", "csilUcUiUsUlmbhfd", "aarch64_sme_readz_q", [ImmCheck<0, ImmCheck0_15>]>; multiclass ZAReadzArray<string vg_num>{ let SMETargetGuard = "sme2p1" in { - def NAME # _B : SInst<"svreadz_za8_{d}_vg1x" # vg_num, vg_num # "m", "cUc", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>; + def NAME # _B : SInst<"svreadz_za8_{d}_vg1x" # vg_num, vg_num # "m", "cUcm", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>; def NAME # _H : SInst<"svreadz_za16_{d}_vg1x" # vg_num, vg_num # "m", "sUsbh", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>; def NAME # _S : SInst<"svreadz_za32_{d}_vg1x" # vg_num, vg_num # "m", "iUif", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>; def NAME # _D : SInst<"svreadz_za64_{d}_vg1x" # vg_num, vg_num # "m", "lUld", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>; diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index e7001bac450e89..12cca992a21ab7 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2104,7 +2104,7 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme" in { def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp", [VerifyRuntimeMode], []>; def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [VerifyRuntimeMode], []>; -defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlbhfd", "aarch64_sve_revd">; +defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlmbhfd", "aarch64_sve_revd">; } let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { @@ -2223,8 +2223,8 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; // 2-way and 4-way selects - def SVSEL_X2 : SInst<"svsel[_{d}_x2]", "2}22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_sel_x2", [IsStreaming], []>; - def SVSEL_X4 : SInst<"svsel[_{d}_x4]", "4}44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_sel_x4", [IsStreaming], []>; + def SVSEL_X2 : SInst<"svsel[_{d}_x2]", "2}22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_sel_x2", [IsStreaming], []>; + def SVSEL_X4 : SInst<"svsel[_{d}_x4]", "4}44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_sel_x4", [IsStreaming], []>; // SRSHL / URSHL def SVSRSHL_SINGLE_X2 : SInst<"svrshl[_single_{d}_x2]", "22d", "csil", MergeNone, "aarch64_sve_srshl_single_x2", [IsStreaming], []>; @@ -2402,15 +2402,15 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { // let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { - def SVZIP_X2 : SInst<"svzip[_{d}_x2]", "22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zip_x2", [IsStreaming], []>; - def SVZIPQ_X2 : SInst<"svzipq[_{d}_x2]", "22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq_x2", [IsStreaming], []>; - def SVZIP_X4 : SInst<"svzip[_{d}_x4]", "44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zip_x4", [IsStreaming], []>; - def SVZIPQ_X4 : SInst<"svzipq[_{d}_x4]", "44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_zipq_x4", [IsStreaming], []>; - - def SVUZP_X2 : SInst<"svuzp[_{d}_x2]", "22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzp_x2", [IsStreaming], []>; - def SVUZPQ_X2 : SInst<"svuzpq[_{d}_x2]", "22", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq_x2", [IsStreaming], []>; - def SVUZP_X4 : SInst<"svuzp[_{d}_x4]", "44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzp_x4", [IsStreaming], []>; - def SVUZPQ_X4 : SInst<"svuzpq[_{d}_x4]", "44", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_uzpq_x4", [IsStreaming], []>; + def SVZIP_X2 : SInst<"svzip[_{d}_x2]", "22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_zip_x2", [IsStreaming], []>; + def SVZIPQ_X2 : SInst<"svzipq[_{d}_x2]", "22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_zipq_x2", [IsStreaming], []>; + def SVZIP_X4 : SInst<"svzip[_{d}_x4]", "44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_zip_x4", [IsStreaming], []>; + def SVZIPQ_X4 : SInst<"svzipq[_{d}_x4]", "44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_zipq_x4", [IsStreaming], []>; + + def SVUZP_X2 : SInst<"svuzp[_{d}_x2]", "22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_uzp_x2", [IsStreaming], []>; + def SVUZPQ_X2 : SInst<"svuzpq[_{d}_x2]", "22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_uzpq_x2", [IsStreaming], []>; + def SVUZP_X4 : SInst<"svuzp[_{d}_x4]", "44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_uzp_x4", [IsStreaming], []>; + def SVUZPQ_X4 : SInst<"svuzpq[_{d}_x4]", "44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_uzpq_x4", [IsStreaming], []>; } // diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c index 508fad09ea715b..0605f9eef036fd 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_read.c @@ -318,6 +318,41 @@ svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice); } +// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_hor_za8_mf8( +// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-C-NEXT: entry: +// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z23test_svread_hor_za8_mf8u13__SVMfloat8_tu10__SVBool_tj( +// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-NEXT: entry: +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svread_hor_za8_mf8(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za8, _mf8, _m)(zd, pg, 0, slice_base); +} + +// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_hor_za8_mf8_1( +// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-C-NEXT: entry: +// CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]]) +// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svread_hor_za8_mf8_1u13__SVMfloat8_tu10__SVBool_tj( +// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-NEXT: entry: +// CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15 +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]]) +// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svread_hor_za8_mf8_1(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + uint32_t slice = slice_base + 15; + return SME_ACLE_FUNC(svread_hor_za8, _mf8, _m)(zd, pg, 0, slice); +} + // CHECK-C-LABEL: define dso_local <vscale x 8 x half> @test_svread_hor_za16_f16( // CHECK-C-SAME: <vscale x 8 x half> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-C-NEXT: entry: @@ -754,6 +789,38 @@ svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base); } +// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_hor_za128_mf8( +// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-C-NEXT: entry: +// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z25test_svread_hor_za128_mf8u13__SVMfloat8_tu10__SVBool_tj( +// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-NEXT: entry: +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svread_hor_za128_mf8(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _mf8, _m)(zd, pg, 0, slice_base); +} + +// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_hor_za128_mf8_1( +// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-C-NEXT: entry: +// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]]) +// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z27test_svread_hor_za128_mf8_1u13__SVMfloat8_tu10__SVBool_tj( +// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-NEXT: entry: +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]]) +// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svread_hor_za128_mf8_1(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_hor_za128, _mf8, _m)(zd, pg, 15, slice_base); +} + // CHECK-C-LABEL: define dso_local <vscale x 8 x half> @test_svread_hor_za128_f16( // CHECK-C-SAME: <vscale x 8 x half> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-C-NEXT: entry: @@ -1202,6 +1269,41 @@ svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice); } +// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_ver_za8_mf8( +// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-C-NEXT: entry: +// CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-C-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z23test_svread_ver_za8_mf8u13__SVMfloat8_tu10__SVBool_tj( +// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i1> [[PG:%.*]], i32 noundef [[SLICE_BASE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-NEXT: entry: +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svread_ver_za8_mf8(svmfloat8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming __arm_in("za") { + return SME_ACLE_FUNC(svread_ver_za8, _mf8, _m)(zd, pg, 0, slice_base); +} + +// CHECK-C-LABEL: define dso_local <vscale x 16 x i8> @test_svread_ver_za8_mf8_1( +// CHECK-C-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 16 x i... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/124543 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits