https://github.com/amilendra updated https://github.com/llvm/llvm-project/pull/145383
>From 2941cdc8c447dc841d7021719587d187cf3219f9 Mon Sep 17 00:00:00 2001 From: Amilendra Kodithuwakku <amilendra.kodithuwa...@arm.com> Date: Mon, 23 Jun 2025 19:16:28 +0100 Subject: [PATCH 1/2] [CLANG][AArch64] Add mfloat8_t support for more SVE load intrinsics Add mfloat8_t support for the following SVE load intrinsics. - SVLD1RO - SVLD1RQ - SVLDFF1 - SVLDFF1_VNUM - SVLDNF1 - SVLDNF1_VNUM --- clang/include/clang/Basic/arm_sve.td | 12 +++---- .../AArch64/sve-intrinsics/acle_sve_ld1ro.c | 14 ++++++++ .../AArch64/sve-intrinsics/acle_sve_ld1rq.c | 15 +++++++++ .../AArch64/sve-intrinsics/acle_sve_ldff1.c | 32 +++++++++++++++++++ .../AArch64/sve-intrinsics/acle_sve_ldnf1.c | 32 +++++++++++++++++++ 5 files changed, 99 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 1b3131be78452..76fd072a41d8b 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -121,7 +121,7 @@ def SVLD1UW_GATHER_INDEX_S : MInst<"svld1uw_gather[_{2}base]_index_{d}", "dPul // First-faulting load one vector (scalar base) -def SVLDFF1 : MInst<"svldff1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; +def SVLDFF1 : MInst<"svldff1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; def SVLDFF1SB : MInst<"svldff1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldff1">; def SVLDFF1UB : MInst<"svldff1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldff1">; def SVLDFF1SH : MInst<"svldff1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldff1">; @@ -130,7 +130,7 @@ def SVLDFF1SW : MInst<"svldff1sw_{d}", "dPU", "lUl", [IsLoad], def SVLDFF1UW : MInst<"svldff1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1">; // First-faulting load one vector (scalar base, VL displacement) -def SVLDFF1_VNUM : MInst<"svldff1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; +def SVLDFF1_VNUM : MInst<"svldff1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; def SVLDFF1SB_VNUM : MInst<"svldff1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldff1">; def SVLDFF1UB_VNUM : MInst<"svldff1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldff1">; def SVLDFF1SH_VNUM : MInst<"svldff1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldff1">; @@ -223,7 +223,7 @@ def SVLDFF1SW_GATHER_INDEX_S : MInst<"svldff1sw_gather[_{2}base]_index_{d}", "dP def SVLDFF1UW_GATHER_INDEX_S : MInst<"svldff1uw_gather[_{2}base]_index_{d}", "dPul", "lUl", [IsGatherLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1_gather_scalar_offset">; // Non-faulting load one vector (scalar base) -def SVLDNF1 : MInst<"svldnf1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; +def SVLDNF1 : MInst<"svldnf1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; def SVLDNF1SB : MInst<"svldnf1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldnf1">; def SVLDNF1UB : MInst<"svldnf1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldnf1">; def SVLDNF1SH : MInst<"svldnf1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldnf1">; @@ -232,7 +232,7 @@ def SVLDNF1SW : MInst<"svldnf1sw_{d}", "dPU", "lUl", [IsLoad], def SVLDNF1UW : MInst<"svldnf1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldnf1">; // Non-faulting load one vector (scalar base, VL displacement) -def SVLDNF1_VNUM : MInst<"svldnf1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; +def SVLDNF1_VNUM : MInst<"svldnf1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; def SVLDNF1SB_VNUM : MInst<"svldnf1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldnf1">; def SVLDNF1UB_VNUM : MInst<"svldnf1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldnf1">; def SVLDNF1SH_VNUM : MInst<"svldnf1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldnf1">; @@ -258,7 +258,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { } // Load one quadword and replicate (scalar base) -def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>; +def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfdm", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>; let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>; @@ -283,7 +283,7 @@ defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret"> // Load one octoword and replicate (scalar base) let SVETargetGuard = "sve,f64mm", SMETargetGuard = InvalidMode in { - def SVLD1RO : SInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1ro">; + def SVLD1RO : SInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfdm", MergeNone, "aarch64_sve_ld1ro">; } let SVETargetGuard = "sve,f64mm,bf16", SMETargetGuard = InvalidMode in { def SVLD1RO_BF16 : SInst<"svld1ro[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1ro">; diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c index 2baba98cc5050..f608b8a8ae302 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c @@ -15,6 +15,20 @@ #define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 #endif +// CHECK-LABEL: @test_svld1ro_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svld1ro_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svld1ro_mf8(svbool_t pg, mfloat8_t const *base) { + return SVE_ACLE_FUNC(svld1ro, _mf8, , )(pg, base); +} + // CHECK-LABEL: @test_svld1ro_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]]) diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq.c index 9784e1b6b8001..0e7455d413274 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq.c @@ -23,6 +23,21 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +// CHECK-LABEL: @test_svld1rq_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svld1rq_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svld1rq_mf8(svbool_t pg, mfloat8_t const *base) MODE_ATTR +{ + return SVE_ACLE_FUNC(svld1rq,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svld1rq_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]]) diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1.c index a647eb0469f9d..ba4091660bfae 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1.c @@ -14,6 +14,21 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +// CHECK-LABEL: @test_svldff1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svldff1_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svldff1_mf8(svbool_t pg, const mfloat8_t *base) +{ + return SVE_ACLE_FUNC(svldff1,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svldff1_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]]) @@ -197,6 +212,23 @@ svfloat64_t test_svldff1_f64(svbool_t pg, const float64_t *base) return SVE_ACLE_FUNC(svldff1,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svldff1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z21test_svldff1_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] +// +svmfloat8_t test_svldff1_vnum_mf8(svbool_t pg, mfloat8_t const *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldff1_vnum,_mf8,,)(pg, base, vnum); +} + // CHECK-LABEL: @test_svldff1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]] diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1.c index 017f960d37061..8e738d839cd85 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1.c @@ -14,6 +14,21 @@ #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 #endif +// CHECK-LABEL: @test_svldnf1_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svldnf1_mf8u10__SVBool_tPKu6__mfp8( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svldnf1_mf8(svbool_t pg, mfloat8_t const *base) +{ + return SVE_ACLE_FUNC(svldnf1,_mf8,,)(pg, base); +} + // CHECK-LABEL: @test_svldnf1_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]]) @@ -197,6 +212,23 @@ svfloat64_t test_svldnf1_f64(svbool_t pg, const float64_t *base) return SVE_ACLE_FUNC(svldnf1,_f64,,)(pg, base); } +// CHECK-LABEL: @test_svldnf1_vnum_mf8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z21test_svldnf1_vnum_mf8u10__SVBool_tPKu6__mfp8l( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]] +// +svmfloat8_t test_svldnf1_vnum_mf8(svbool_t pg, mfloat8_t const *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnf1_vnum,_mf8,,)(pg, base, vnum); +} + // CHECK-LABEL: @test_svldnf1_vnum_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]] >From 171463ebb0eca7f2f4d2dc191203568e0bcd2972 Mon Sep 17 00:00:00 2001 From: Amilendra Kodithuwakku <amilendra.kodithuwa...@arm.com> Date: Fri, 27 Jun 2025 09:45:50 +0100 Subject: [PATCH 2/2] [CLANG][AArch64] Add mfloat8_t support for more SVE load intrinsics Add lowering to assembly tests --- clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c index f608b8a8ae302..e7520a504b121 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c @@ -3,6 +3,7 @@ // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +f64mm -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +f64mm -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +f64mm -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +f64mm -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits