https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/70476
>From 9f90ac3383b37e9d2310836527d01a94b6fbadb9 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Fri, 27 Oct 2023 16:09:07 +0100 Subject: [PATCH 1/2] [AArch64] Add SVE2.1 intrinsics for indexed quadword gather loads and scatter stores This patch adds the quadword gather load intrinsics of the form (1) sv<type>_t svld1q_gather_u64index_<typ>(svbool_t, const <type>_t *, svuint64_t); (2) sv<type>_t svld1q_gather_u64base_index_<typ>(svbool_t, svuint64_t, int64_t); and the quadword scatter store intrinsics of the form (3) void svst1q_scatter_u64index_<typ>(svbool_t, <type>_t *, svuint64_t, sv<type>_t); (4) void svst1q_scatter_u64base_index_<typ>(svbool, svuint64_t, int64_t, sv<type>_t); (intrinsics (1) and (3) are currently missing the variants for non 64-bit sized base types, e.g. `int8_t` or `bfloat16_t`, etc). ACLE spec: https://github.com/ARM-software/acle/pull/257 --- clang/include/clang/Basic/arm_sve.td | 12 + .../acle_sve2p1_loads.c | 340 ++++++++++++++++++ .../acle_sve2p1_store.c | 340 ++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 25 ++ .../Target/AArch64/AArch64ISelLowering.cpp | 38 +- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + ...p1-intrinsics-gather-loads-128bit-index.ll | 249 +++++++++++++ ...-intrinsics-scatter-stores-128bit-index.ll | 248 +++++++++++++ 8 files changed, 1244 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-index.ll create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-index.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 07d9fc6e04f1b29..ee46a81a942c3f3 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -319,6 +319,12 @@ let TargetGuard = "sve2p1" in { defm SVLD2Q_VNUM : StructLoad<"svld2q_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2q_sret">; defm SVLD3Q_VNUM : StructLoad<"svld3q_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3q_sret">; defm SVLD4Q_VNUM : StructLoad<"svld4q_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4q_sret">; + + // Load quadwords (scalar base + vector index) + def SVLD1Q_GATHER_INDICES_U : MInst<"svld1q_gather_[{3}]index[_{0}]", "dPcg", "sUsiUilUlbhfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ld1q_gather_index">; + + // Load quadwords (vector base + scalar index) + def SVLD1Q_GATHER_INDEX_S : MInst<"svld1q_gather[_{2}base]_index_{0}", "dPgl", "sUsiUilUlbhfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; } //////////////////////////////////////////////////////////////////////////////// @@ -464,6 +470,12 @@ let TargetGuard = "sve2p1" in { defm SVST2Q_VNUM : StructStore<"svst2q_vnum[_{d}]", "vPcl2", "aarch64_sve_st2q">; defm SVST3Q_VNUM : StructStore<"svst3q_vnum[_{d}]", "vPcl3", "aarch64_sve_st3q">; defm SVST4Q_VNUM : StructStore<"svst4q_vnum[_{d}]", "vPcl4", "aarch64_sve_st4q">; + + // Scatter store quadwords (scalar base + vector index) + def SVST1Q_SCATTER_INDICES_U : MInst<"svst1q_scatter_[{3}]index[_{0}]", "vPpgd", "sUsiUilUlbhfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1q_scatter_index">; + + // Scatter store quadwords (vector base + scalar index) + def SVST1Q_SCATTER_INDEX_S : MInst<"svst1q_scatter[_{2}base]_index[_{0}]", "vPgld", "sUsiUilUlbhfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; } //////////////////////////////////////////////////////////////////////////////// diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c index 35e0069e17c136f..44351347e4cf0ae 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c @@ -2528,3 +2528,343 @@ svbfloat16_t test_svld1q_gather_u64base_bf16(svbool_t pg, svuint64_t base) { return SVE_ACLE_FUNC(svld1q_gather,_u64base,_bf16,)(pg, base); } + +// CHECK-LABEL: @test_svld1q_gather_u64index_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.index.nxv8i16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z31test_svld1q_gather_u64index_s16u10__SVBool_tPKsu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.index.nxv8i16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] +// +svint16_t test_svld1q_gather_u64index_s16(svbool_t pg, const int16_t *base, svuint64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,index,_s16) (pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64index_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.index.nxv8i16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z31test_svld1q_gather_u64index_u16u10__SVBool_tPKtu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.index.nxv8i16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] +// +svuint16_t test_svld1q_gather_u64index_u16(svbool_t pg, const uint16_t *base, svuint64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,index,_u16) (pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64index_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.index.nxv4i32(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z31test_svld1q_gather_u64index_s32u10__SVBool_tPKiu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.index.nxv4i32(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] +// +svint32_t test_svld1q_gather_u64index_s32(svbool_t pg, const int32_t *base, svuint64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,index,_s32) (pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64index_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.index.nxv4i32(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z31test_svld1q_gather_u64index_u32u10__SVBool_tPKju12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.index.nxv4i32(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] +// +svuint32_t test_svld1q_gather_u64index_u32(svbool_t pg, const uint32_t *base, svuint64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,index,_u32) (pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64index_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.index.nxv2i64(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z31test_svld1q_gather_u64index_s64u10__SVBool_tPKlu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.index.nxv2i64(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] +// +svint64_t test_svld1q_gather_u64index_s64(svbool_t pg, const int64_t *base, svuint64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,index,_s64) (pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64index_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.index.nxv2i64(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z31test_svld1q_gather_u64index_u64u10__SVBool_tPKmu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.index.nxv2i64(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] +// +svuint64_t test_svld1q_gather_u64index_u64(svbool_t pg, const uint64_t *base, svuint64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,index,_u64) (pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64index_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.index.nxv8bf16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z32test_svld1q_gather_u64index_bf16u10__SVBool_tPKu6__bf16u12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.index.nxv8bf16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]] +// +svbfloat16_t test_svld1q_gather_u64index_bf16(svbool_t pg, const bfloat16_t *base, svuint64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,index,_bf16) (pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64index_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.index.nxv8f16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z31test_svld1q_gather_u64index_f16u10__SVBool_tPKDhu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.index.nxv8f16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +svfloat16_t test_svld1q_gather_u64index_f16(svbool_t pg, const float16_t *base, svuint64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,index,_f16) (pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64index_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.index.nxv4f32(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z31test_svld1q_gather_u64index_f32u10__SVBool_tPKfu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.index.nxv4f32(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +svfloat32_t test_svld1q_gather_u64index_f32(svbool_t pg, const float32_t *base, svuint64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,index,_f32) (pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64index_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.index.nxv2f64(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z31test_svld1q_gather_u64index_f64u10__SVBool_tPKdu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.index.nxv2f64(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +svfloat64_t test_svld1q_gather_u64index_f64(svbool_t pg, const float64_t *base, svuint64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather_,u64,index,_f64) (pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_index_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z36test_svld1q_gather_u64base_index_s16u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +// +svint16_t test_svld1q_gather_u64base_index_s16(svbool_t pg, svuint64_t base, int64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_index_s16,)(pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_index_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z36test_svld1q_gather_u64base_index_u16u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]] +// +svuint16_t test_svld1q_gather_u64base_index_u16(svbool_t pg, svuint64_t base, int64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_index_u16,)(pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_index_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z36test_svld1q_gather_u64base_index_s32u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +// +svint32_t test_svld1q_gather_u64base_index_s32(svbool_t pg, svuint64_t base, int64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_index_s32,)(pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_index_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z36test_svld1q_gather_u64base_index_u32u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] +// +svuint32_t test_svld1q_gather_u64base_index_u32(svbool_t pg, svuint64_t base, int64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_index_u32,)(pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_index_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z36test_svld1q_gather_u64base_index_s64u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +// +svint64_t test_svld1q_gather_u64base_index_s64(svbool_t pg, svuint64_t base, int64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_index_s64,)(pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_index_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z36test_svld1q_gather_u64base_index_u64u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]] +// +svuint64_t test_svld1q_gather_u64base_index_u64(svbool_t pg, svuint64_t base, int64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_index_u64,)(pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_index_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8bf16.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z37test_svld1q_gather_u64base_index_bf16u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8bf16.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]] +// +svbfloat16_t test_svld1q_gather_u64base_index_bf16(svbool_t pg, svuint64_t base, int64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_index_bf16,)(pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_index_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8f16.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z36test_svld1q_gather_u64base_index_f16u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8f16.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +// +svfloat16_t test_svld1q_gather_u64base_index_f16(svbool_t pg, svuint64_t base, int64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_index_f16,)(pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_index_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4f32.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z36test_svld1q_gather_u64base_index_f32u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4f32.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +// +svfloat32_t test_svld1q_gather_u64base_index_f32(svbool_t pg, svuint64_t base, int64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_index_f32,)(pg, base, idx); +} + +// CHECK-LABEL: @test_svld1q_gather_u64base_index_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z36test_svld1q_gather_u64base_index_f64u10__SVBool_tu12__SVUint64_tl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +// +svfloat64_t test_svld1q_gather_u64base_index_f64(svbool_t pg, svuint64_t base, int64_t idx) { + return SVE_ACLE_FUNC(svld1q_gather,_u64base,_index_f64,)(pg, base, idx); +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c index 1fb5933ce75e1e3..137801cc0814a10 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c @@ -2130,3 +2130,343 @@ void test_svst1q_scatter_u64base_bf16(svbool_t pg, svuint64_t base, svbfloat16_t { SVE_ACLE_FUNC(svst1q_scatter, _u64base,,_bf16)(pg, base, data); } + +// CHECK-LABEL: @test_svst1q_scatter_u64index_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_s16u10__SVBool_tPsu12__SVUint64_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64index_s16(svbool_t pg, int16_t *base, svuint64_t idx, svint16_t data) { + SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _s16)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64index_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_u16u10__SVBool_tPtu12__SVUint64_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64index_u16(svbool_t pg, uint16_t *base, svuint64_t idx, svuint16_t data) { + SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _u16)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64index_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_s32u10__SVBool_tPiu12__SVUint64_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64index_s32(svbool_t pg, int32_t *base, svuint64_t idx, svint32_t data) { + SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _s32)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64index_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_u32u10__SVBool_tPju12__SVUint64_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64index_u32(svbool_t pg, uint32_t *base, svuint64_t idx, svuint32_t data) { + SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _u32)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64index_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_s64u10__SVBool_tPlu12__SVUint64_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64index_s64(svbool_t pg, int64_t *base, svuint64_t idx, svint64_t data) { + SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _s64)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64index_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_u64u10__SVBool_tPmu12__SVUint64_tS1_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64index_u64(svbool_t pg, uint64_t *base, svuint64_t idx, svuint64_t data) { + SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _u64)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64index_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z33test_svst1q_scatter_u64index_bf16u10__SVBool_tPu6__bf16u12__SVUint64_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64index_bf16(svbool_t pg, bfloat16_t *base, svuint64_t idx, svbfloat16_t data) { + SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _bf16)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64index_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_f16u10__SVBool_tPDhu12__SVUint64_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64index_f16(svbool_t pg, float16_t *base, svuint64_t idx, svfloat16_t data) { + SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _f16)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64index_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_f32u10__SVBool_tPfu12__SVUint64_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64index_f32(svbool_t pg, float32_t *base, svuint64_t idx, svfloat32_t data) { + SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _f32)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64index_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z32test_svst1q_scatter_u64index_f64u10__SVBool_tPdu12__SVUint64_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64index_f64(svbool_t pg, float64_t *base, svuint64_t idx, svfloat64_t data) { + SVE_ACLE_FUNC(svst1q_scatter_, u64, index, _f64)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_index_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_s16u10__SVBool_tu12__SVUint64_tlu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_index_s16(svbool_t pg, svuint64_t base, int64_t idx, svint16_t data) { + SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_s16)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_index_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_u16u10__SVBool_tu12__SVUint64_tlu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_index_u16(svbool_t pg, svuint64_t base, int64_t idx, svuint16_t data) { + SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_u16)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_index_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_s32u10__SVBool_tu12__SVUint64_tlu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_index_s32(svbool_t pg, svuint64_t base, int64_t idx, svint32_t data) { + SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_s32)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_index_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_u32u10__SVBool_tu12__SVUint64_tlu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_index_u32(svbool_t pg, svuint64_t base, int64_t idx, svuint32_t data) { + SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_u32)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_index_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_s64u10__SVBool_tu12__SVUint64_tlu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_index_s64(svbool_t pg, svuint64_t base, int64_t idx, svint64_t data) { + SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_s64)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_index_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_u64u10__SVBool_tu12__SVUint64_tlS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_index_u64(svbool_t pg, svuint64_t base, int64_t idx, svuint64_t data) { + SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_u64)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_index_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z38test_svst1q_scatter_u64base_index_bf16u10__SVBool_tu12__SVUint64_tlu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_index_bf16(svbool_t pg, svuint64_t base, int64_t idx, svbfloat16_t data) { + SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_bf16)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_index_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_f16u10__SVBool_tu12__SVUint64_tlu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64(<vscale x 8 x half> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_index_f16(svbool_t pg, svuint64_t base, int64_t idx, svfloat16_t data) { + SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_f16)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_index_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_f32u10__SVBool_tu12__SVUint64_tlu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64(<vscale x 4 x float> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_index_f32(svbool_t pg, svuint64_t base, int64_t idx, svfloat32_t data) { + SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_f32)(pg, base, idx, data); +} + +// CHECK-LABEL: @test_svst1q_scatter_u64base_index_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z37test_svst1q_scatter_u64base_index_f64u10__SVBool_tu12__SVUint64_tlu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[IDX:%.*]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 1 x i1> [[TMP0]], <vscale x 2 x i64> [[BASE:%.*]], i64 [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1q_scatter_u64base_index_f64(svbool_t pg, svuint64_t base, int64_t idx, svfloat64_t data) { + SVE_ACLE_FUNC(svst1q_scatter,_u64base,_index,_f64)(pg, base, idx, data); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 35129d6b6c16bdc..a558a1eca84af6c 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1466,6 +1466,15 @@ class AdvSIMD_GatherLoadQ_VS_Intrinsic ], [IntrReadMem]>; +class AdvSIMD_GatherLoadQ_SV_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [ + llvm_nxv1i1_ty, + llvm_ptr_ty, + llvm_nxv2i64_ty + ], + [IntrReadMem, IntrArgMemOnly]>; + class AdvSIMD_GatherLoad_VS_WriteFFR_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [ @@ -1514,6 +1523,16 @@ class AdvSIMD_ScatterStoreQ_VS_Intrinsic ], [IntrWriteMem]>; +class AdvSIMD_ScatterStoreQ_SV_Intrinsic + : DefaultAttrsIntrinsic<[], + [ + llvm_anyvector_ty, + llvm_nxv1i1_ty, + llvm_ptr_ty, + llvm_nxv2i64_ty + ], + [IntrWriteMem, IntrArgMemOnly]>; + class SVE_gather_prf_SV : DefaultAttrsIntrinsic<[], [ @@ -2144,6 +2163,9 @@ def int_aarch64_sve_ld1_gather_uxtw : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsi def int_aarch64_sve_ld1_gather_sxtw_index : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic; def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic; +// 128-bit loads, scaled offsets (indices) +def int_aarch64_sve_ld1q_gather_index : AdvSIMD_GatherLoadQ_SV_Intrinsic; + // // Gather loads: vector base + scalar offset // @@ -2222,6 +2244,9 @@ def int_aarch64_sve_st1_scatter_sxtw_index def int_aarch64_sve_st1_scatter_uxtw_index : AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic; +// 128-bit stores, scaled offsets (indices) +def int_aarch64_sve_st1q_scatter_index : AdvSIMD_ScatterStoreQ_SV_Intrinsic; + // // Scatter stores: vector base + scalar offset // diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f744643a9d9b388..8d37d91013b8641 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2579,6 +2579,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO) MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO) MAKE_CASE(AArch64ISD::GLD1Q_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1Q_INDEX_MERGE_ZERO) MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO) MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO) MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO) @@ -2604,6 +2605,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO) MAKE_CASE(AArch64ISD::SST1Q_PRED) + MAKE_CASE(AArch64ISD::SST1Q_INDEX_PRED) MAKE_CASE(AArch64ISD::ST1_PRED) MAKE_CASE(AArch64ISD::SST1_PRED) MAKE_CASE(AArch64ISD::SST1_SCALED_PRED) @@ -22761,10 +22763,11 @@ static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); // For FPs, ACLE only supports _packed_ single and double precision types. - // SST1Q_PRED is the ST1Q for sve2p1 and should allow all sizes + // SST1Q_[INDEX_]PRED is the ST1Q for sve2p1 and should allow all sizes. if (SrcElVT.isFloatingPoint()) if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64) && - (Opcode != AArch64ISD::SST1Q_PRED || + ((Opcode != AArch64ISD::SST1Q_PRED && + Opcode != AArch64ISD::SST1Q_INDEX_PRED) || ((SrcVT != MVT::nxv8f16) && (SrcVT != MVT::nxv8bf16)))) return SDValue(); @@ -22782,6 +22785,10 @@ static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG, Offset = getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits()); Opcode = AArch64ISD::SSTNT1_PRED; + } else if (Opcode == AArch64ISD::SST1Q_INDEX_PRED) { + Offset = + getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits()); + Opcode = AArch64ISD::SST1Q_PRED; } // In the case of non-temporal gather loads there's only one SVE instruction @@ -22789,7 +22796,8 @@ static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG, // * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0] // Since we do have intrinsics that allow the arguments to be in a different // order, we may need to swap them to match the spec. - if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector()) + if ((Opcode == AArch64ISD::SSTNT1_PRED || Opcode == AArch64ISD::SST1Q_PRED) && + Offset.getValueType().isVector()) std::swap(Base, Offset); // SST1_IMM requires that the offset is an immediate that is: @@ -22872,21 +22880,26 @@ static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG, // vector of offsets (that fits into one register) SDValue Offset = N->getOperand(4); - // For "scalar + vector of indices", just scale the indices. This only - // applies to non-temporal gathers because there's no instruction that takes - // indicies. + // For "scalar + vector of indices", scale the indices to obtain unscaled + // offsets. This applies to non-temporal and quadword gathers, which do not + // have an addressing mode with scaled offset. if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) { Offset = getScaledOffsetForBitWidth(DAG, Offset, DL, RetVT.getScalarSizeInBits()); Opcode = AArch64ISD::GLDNT1_MERGE_ZERO; + } else if (Opcode == AArch64ISD::GLD1Q_INDEX_MERGE_ZERO) { + Offset = getScaledOffsetForBitWidth(DAG, Offset, DL, + RetVT.getScalarSizeInBits()); + Opcode = AArch64ISD::GLD1Q_MERGE_ZERO; } - // In the case of non-temporal gather loads there's only one SVE instruction - // per data-size: "scalar + vector", i.e. - // * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0] + // In the case of non-temporal gather loads and quadword gather loads there's + // only one addressing mode : "vector + scalar", e.g. + // ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0] // Since we do have intrinsics that allow the arguments to be in a different // order, we may need to swap them to match the spec. - if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO && + if ((Opcode == AArch64ISD::GLDNT1_MERGE_ZERO || + Opcode == AArch64ISD::GLD1Q_MERGE_ZERO) && Offset.getValueType().isVector()) std::swap(Base, Offset); @@ -23736,6 +23749,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO); case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset: return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1Q_MERGE_ZERO); + case Intrinsic::aarch64_sve_ld1q_gather_index: + return performGatherLoadCombine(N, DAG, + AArch64ISD::GLD1Q_INDEX_MERGE_ZERO); case Intrinsic::aarch64_sve_ld1_gather_index: return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SCALED_MERGE_ZERO); @@ -23781,6 +23797,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, AArch64ISD::GLDFF1_IMM_MERGE_ZERO); case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset: return performScatterStoreCombine(N, DAG, AArch64ISD::SST1Q_PRED); + case Intrinsic::aarch64_sve_st1q_scatter_index: + return performScatterStoreCombine(N, DAG, AArch64ISD::SST1Q_INDEX_PRED); case Intrinsic::aarch64_sve_st1_scatter: return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED); case Intrinsic::aarch64_sve_st1_scatter_index: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index b4e89297ad58780..169b0dbab65cdca 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -376,6 +376,7 @@ enum NodeType : unsigned { GLD1_SXTW_SCALED_MERGE_ZERO, GLD1_IMM_MERGE_ZERO, GLD1Q_MERGE_ZERO, + GLD1Q_INDEX_MERGE_ZERO, // Signed gather loads GLD1S_MERGE_ZERO, @@ -421,6 +422,7 @@ enum NodeType : unsigned { SST1_SXTW_SCALED_PRED, SST1_IMM_PRED, SST1Q_PRED, + SST1Q_INDEX_PRED, // Non-temporal scatter store SSTNT1_PRED, diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-index.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-index.ll new file mode 100644 index 000000000000000..4ad13ee97f010e2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-index.ll @@ -0,0 +1,249 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s + +declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare <vscale x 16 x i8> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4f32.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8f16.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8bf16.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.index.nxv4i32(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.index.nxv8i16(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) +declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.index.nxv2i64(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) +declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.index.nxv8bf16(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) +declare <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.index.nxv8f16(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) +declare <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.index.nxv4f32(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) +declare <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.index.nxv2f64(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) + +define <vscale x 8 x i16> @test_svld1q_gather_u64index_s16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { +; CHECK-LABEL: test_svld1q_gather_u64index_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #1 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.index.nxv8i16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret <vscale x 8 x i16> %0 +} + +define <vscale x 8 x i16> @test_svld1q_gather_u64index_u16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { +; CHECK-LABEL: test_svld1q_gather_u64index_u16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #1 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.index.nxv8i16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret <vscale x 8 x i16> %0 +} + +define <vscale x 4 x i32> @test_svld1q_gather_u64index_s32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { +; CHECK-LABEL: test_svld1q_gather_u64index_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #2 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.index.nxv4i32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret <vscale x 4 x i32> %0 +} + +define <vscale x 4 x i32> @test_svld1q_gather_u64index_u32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { +; CHECK-LABEL: test_svld1q_gather_u64index_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #2 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.index.nxv4i32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret <vscale x 4 x i32> %0 +} + +define <vscale x 2 x i64> @test_svld1q_gather_u64index_s64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { +; CHECK-LABEL: test_svld1q_gather_u64index_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #3 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.index.nxv2i64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret <vscale x 2 x i64> %0 +} + +define <vscale x 2 x i64> @test_svld1q_gather_u64index_u64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { +; CHECK-LABEL: test_svld1q_gather_u64index_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #3 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.index.nxv2i64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret <vscale x 2 x i64> %0 +} + +define <vscale x 8 x bfloat> @test_svld1q_gather_u64index_bf16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { +; CHECK-LABEL: test_svld1q_gather_u64index_bf16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #1 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.index.nxv8bf16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret <vscale x 8 x bfloat> %0 +} + +define <vscale x 8 x half> @test_svld1q_gather_u64index_f16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { +; CHECK-LABEL: test_svld1q_gather_u64index_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #1 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.index.nxv8f16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret <vscale x 8 x half> %0 +} + +define <vscale x 4 x float> @test_svld1q_gather_u64index_f32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { +; CHECK-LABEL: test_svld1q_gather_u64index_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #2 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.index.nxv4f32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret <vscale x 4 x float> %0 +} + +define <vscale x 2 x double> @test_svld1q_gather_u64index_f64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { +; CHECK-LABEL: test_svld1q_gather_u64index_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #3 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] +; CHECK-NEXT: ret +entry: + %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.index.nxv2f64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret <vscale x 2 x double> %0 +} + +define <vscale x 8 x i16> @test_svld1q_gather_u64base_index_s16(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { +; CHECK-LABEL: test_svld1q_gather_u64base_index_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #1 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 1 + %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret <vscale x 8 x i16> %1 +} + +define <vscale x 8 x i16> @test_svld1q_gather_u64base_index_u16(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { +; CHECK-LABEL: test_svld1q_gather_u64base_index_u16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #1 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 1 + %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret <vscale x 8 x i16> %1 +} + +define <vscale x 4 x i32> @test_svld1q_gather_u64base_index_s32(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { +; CHECK-LABEL: test_svld1q_gather_u64base_index_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #2 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 2 + %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret <vscale x 4 x i32> %1 +} + +define <vscale x 4 x i32> @test_svld1q_gather_u64base_index_u32(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { +; CHECK-LABEL: test_svld1q_gather_u64base_index_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #2 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 2 + %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret <vscale x 4 x i32> %1 +} + +define <vscale x 2 x i64> @test_svld1q_gather_u64base_index_s64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { +; CHECK-LABEL: test_svld1q_gather_u64base_index_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #3 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 3 + %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret <vscale x 2 x i64> %1 +} + +define <vscale x 2 x i64> @test_svld1q_gather_u64base_index_u64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { +; CHECK-LABEL: test_svld1q_gather_u64base_index_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #3 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 3 + %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret <vscale x 2 x i64> %1 +} + +define <vscale x 8 x bfloat> @test_svld1q_gather_u64base_index_bf16(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { +; CHECK-LABEL: test_svld1q_gather_u64base_index_bf16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #1 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 1 + %1 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8bf16.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret <vscale x 8 x bfloat> %1 +} + +define <vscale x 8 x half> @test_svld1q_gather_u64base_index_f16(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { +; CHECK-LABEL: test_svld1q_gather_u64base_index_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #1 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 1 + %1 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8f16.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret <vscale x 8 x half> %1 +} + +define <vscale x 4 x float> @test_svld1q_gather_u64base_index_f32(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { +; CHECK-LABEL: test_svld1q_gather_u64base_index_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #2 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 2 + %1 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4f32.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret <vscale x 4 x float> %1 +} + +define <vscale x 2 x double> @test_svld1q_gather_u64base_index_f64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { +; CHECK-LABEL: test_svld1q_gather_u64base_index_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #3 +; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 3 + %1 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret <vscale x 2 x double> %1 +} + diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-index.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-index.ll new file mode 100644 index 000000000000000..29c9c6a23235e15 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-index.ll @@ -0,0 +1,248 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s + +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64>, <vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32>, <vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16>, <vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv16i8.nxv2i64(<vscale x 16 x i8>, <vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double>, <vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64(<vscale x 4 x float>, <vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64(<vscale x 8 x half>, <vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64(<vscale x 8 x bfloat>, <vscale x 1 x i1>, <vscale x 2 x i64>, i64) +declare void @llvm.aarch64.sve.st1q.scatter.index.nxv8i16(<vscale x 8 x i16>, <vscale x 1 x i1>, ptr, <vscale x 2 x i64>) +declare void @llvm.aarch64.sve.st1q.scatter.index.nxv4i32(<vscale x 4 x i32>, <vscale x 1 x i1>, ptr, <vscale x 2 x i64>) +declare void @llvm.aarch64.sve.st1q.scatter.index.nxv2i64(<vscale x 2 x i64>, <vscale x 1 x i1>, ptr, <vscale x 2 x i64>) +declare void @llvm.aarch64.sve.st1q.scatter.index.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 1 x i1>, ptr, <vscale x 2 x i64>) +declare void @llvm.aarch64.sve.st1q.scatter.index.nxv8f16(<vscale x 8 x half>, <vscale x 1 x i1>, ptr, <vscale x 2 x i64>) +declare void @llvm.aarch64.sve.st1q.scatter.index.nxv4f32(<vscale x 4 x float>, <vscale x 1 x i1>, ptr, <vscale x 2 x i64>) +declare void @llvm.aarch64.sve.st1q.scatter.index.nxv2f64(<vscale x 2 x double>, <vscale x 1 x i1>, ptr, <vscale x 2 x i64>) + +define void @test_svst1q_scatter_u64index_s16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx, <vscale x 8 x i16> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64index_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #1 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8i16(<vscale x 8 x i16> %data, <vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret void +} + +define void @test_svst1q_scatter_u64index_u16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx, <vscale x 8 x i16> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64index_u16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #1 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8i16(<vscale x 8 x i16> %data, <vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret void +} + +define void @test_svst1q_scatter_u64index_s32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx, <vscale x 4 x i32> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64index_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #2 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4i32(<vscale x 4 x i32> %data, <vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret void +} + +define void @test_svst1q_scatter_u64index_u32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx, <vscale x 4 x i32> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64index_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #2 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4i32(<vscale x 4 x i32> %data, <vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret void +} + +define void @test_svst1q_scatter_u64index_s64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx, <vscale x 2 x i64> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64index_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #3 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2i64(<vscale x 2 x i64> %data, <vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret void +} + +define void @test_svst1q_scatter_u64index_u64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx, <vscale x 2 x i64> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64index_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #3 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2i64(<vscale x 2 x i64> %data, <vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret void +} + +define void @test_svst1q_scatter_u64index_bf16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx, <vscale x 8 x bfloat> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64index_bf16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #1 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8bf16(<vscale x 8 x bfloat> %data, <vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret void +} + +define void @test_svst1q_scatter_u64index_f16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx, <vscale x 8 x half> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64index_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #1 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv8f16(<vscale x 8 x half> %data, <vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret void +} + +define void @test_svst1q_scatter_u64index_f32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx, <vscale x 4 x float> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64index_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #2 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv4f32(<vscale x 4 x float> %data, <vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret void +} + +define void @test_svst1q_scatter_u64index_f64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx, <vscale x 2 x double> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64index_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl z0.d, z0.d, #3 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.aarch64.sve.st1q.scatter.index.nxv2f64(<vscale x 2 x double> %data, <vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) + ret void +} + +define void @test_svst1q_scatter_u64base_index_s16(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx, <vscale x 8 x i16> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64base_index_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #1 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 1 + tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> %data, <vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret void +} + +define void @test_svst1q_scatter_u64base_index_u16(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx, <vscale x 8 x i16> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64base_index_u16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #1 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 1 + tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8i16.nxv2i64(<vscale x 8 x i16> %data, <vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret void +} + +define void @test_svst1q_scatter_u64base_index_s32(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx, <vscale x 4 x i32> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64base_index_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #2 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 2 + tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> %data, <vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret void +} + +define void @test_svst1q_scatter_u64base_index_u32(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx, <vscale x 4 x i32> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64base_index_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #2 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 2 + tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(<vscale x 4 x i32> %data, <vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret void +} + +define void @test_svst1q_scatter_u64base_index_s64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx, <vscale x 2 x i64> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64base_index_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #3 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 3 + tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data, <vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret void +} + +define void @test_svst1q_scatter_u64base_index_u64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx, <vscale x 2 x i64> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64base_index_u64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #3 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 3 + tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i64> %data, <vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret void +} + +define void @test_svst1q_scatter_u64base_index_bf16(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx, <vscale x 8 x bfloat> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64base_index_bf16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #1 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 1 + tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8bf16.nxv2i64(<vscale x 8 x bfloat> %data, <vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret void +} + +define void @test_svst1q_scatter_u64base_index_f16(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx, <vscale x 8 x half> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64base_index_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #1 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 1 + tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv8f16.nxv2i64(<vscale x 8 x half> %data, <vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret void +} + +define void @test_svst1q_scatter_u64base_index_f32(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx, <vscale x 4 x float> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64base_index_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #2 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 2 + tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4f32.nxv2i64(<vscale x 4 x float> %data, <vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret void +} + +define void @test_svst1q_scatter_u64base_index_f64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx, <vscale x 2 x double> %data) { +; CHECK-LABEL: test_svst1q_scatter_u64base_index_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x0, #3 +; CHECK-NEXT: st1q { z1.q }, p0, [z0.d, x8] +; CHECK-NEXT: ret +entry: + %0 = shl i64 %idx, 3 + tail call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x double> %data, <vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) + ret void +} >From c2fa16434e3453da082104ebe5b55367286d471d Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Fri, 10 Nov 2023 15:34:12 +0000 Subject: [PATCH 2/2] Fixup patch --- clang/include/clang/Basic/arm_sve.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index ee46a81a942c3f3..a1ac926ab9577bb 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -321,10 +321,10 @@ let TargetGuard = "sve2p1" in { defm SVLD4Q_VNUM : StructLoad<"svld4q_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4q_sret">; // Load quadwords (scalar base + vector index) - def SVLD1Q_GATHER_INDICES_U : MInst<"svld1q_gather_[{3}]index[_{0}]", "dPcg", "sUsiUilUlbhfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ld1q_gather_index">; + def SVLD1Q_GATHER_INDICES_U : MInst<"svld1q_gather_[{3}]index[_{d}]", "dPcg", "sUsiUilUlbhfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ld1q_gather_index">; // Load quadwords (vector base + scalar index) - def SVLD1Q_GATHER_INDEX_S : MInst<"svld1q_gather[_{2}base]_index_{0}", "dPgl", "sUsiUilUlbhfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; + def SVLD1Q_GATHER_INDEX_S : MInst<"svld1q_gather[_{2}base]_index_{d}", "dPgl", "sUsiUilUlbhfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; } //////////////////////////////////////////////////////////////////////////////// @@ -472,10 +472,10 @@ let TargetGuard = "sve2p1" in { defm SVST4Q_VNUM : StructStore<"svst4q_vnum[_{d}]", "vPcl4", "aarch64_sve_st4q">; // Scatter store quadwords (scalar base + vector index) - def SVST1Q_SCATTER_INDICES_U : MInst<"svst1q_scatter_[{3}]index[_{0}]", "vPpgd", "sUsiUilUlbhfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1q_scatter_index">; + def SVST1Q_SCATTER_INDICES_U : MInst<"svst1q_scatter_[{3}]index[_{d}]", "vPpgd", "sUsiUilUlbhfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1q_scatter_index">; // Scatter store quadwords (vector base + scalar index) - def SVST1Q_SCATTER_INDEX_S : MInst<"svst1q_scatter[_{2}base]_index[_{0}]", "vPgld", "sUsiUilUlbhfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; + def SVST1Q_SCATTER_INDEX_S : MInst<"svst1q_scatter[_{2}base]_index[_{d}]", "vPgld", "sUsiUilUlbhfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; } //////////////////////////////////////////////////////////////////////////////// _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits