https://github.com/jf-botto updated https://github.com/llvm/llvm-project/pull/116422
>From 75cc7d90fa8a7f0cde0df969577556ac1098256b Mon Sep 17 00:00:00 2001 From: Jorge Botto <jorge.botto...@ucl.ac.uk> Date: Fri, 15 Nov 2024 18:56:54 +0000 Subject: [PATCH 1/4] Making Clang emit llvm.vector.reverse instead of llvm.aarch64.sve.rev --- clang/include/clang/Basic/arm_sve.td | 2 +- .../AArch64/sve-intrinsics/acle_sve_rev.c | 44 +++++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index b20383e72e66a37..c954a6582171728 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1060,7 +1060,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckExtract, 1>]>; defm SVLASTA : SVEPerm<"svlasta[_{d}]", "sPd", "aarch64_sve_lasta">; defm SVLASTB : SVEPerm<"svlastb[_{d}]", "sPd", "aarch64_sve_lastb">; -def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>; +def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "vector_reverse", [VerifyRuntimeMode]>; def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel", [VerifyRuntimeMode]>; def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice", [VerifyRuntimeMode]>; def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>; diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c index 3c0ae7df79644fa..835d1c616aebcb0 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c @@ -24,12 +24,12 @@ // CHECK-LABEL: @test_svrev_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> [[OP:%.*]]) // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // // CPP-CHECK-LABEL: @_Z13test_svrev_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // svint8_t test_svrev_s8(svint8_t op) MODE_ATTR @@ -39,12 +39,12 @@ svint8_t test_svrev_s8(svint8_t op) MODE_ATTR // CHECK-LABEL: @test_svrev_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> [[OP:%.*]]) // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svrev_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] // svint16_t test_svrev_s16(svint16_t op) MODE_ATTR @@ -54,12 +54,12 @@ svint16_t test_svrev_s16(svint16_t op) MODE_ATTR // CHECK-LABEL: @test_svrev_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[OP:%.*]]) // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svrev_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] // svint32_t test_svrev_s32(svint32_t op) MODE_ATTR @@ -69,12 +69,12 @@ svint32_t test_svrev_s32(svint32_t op) MODE_ATTR // CHECK-LABEL: @test_svrev_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> [[OP:%.*]]) // CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svrev_s64u11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]] // svint64_t test_svrev_s64(svint64_t op) MODE_ATTR @@ -84,12 +84,12 @@ svint64_t test_svrev_s64(svint64_t op) MODE_ATTR // CHECK-LABEL: @test_svrev_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> [[OP:%.*]]) // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // // CPP-CHECK-LABEL: @_Z13test_svrev_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // svuint8_t test_svrev_u8(svuint8_t op) MODE_ATTR @@ -99,12 +99,12 @@ svuint8_t test_svrev_u8(svuint8_t op) MODE_ATTR // CHECK-LABEL: @test_svrev_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> [[OP:%.*]]) // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svrev_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] // svuint16_t test_svrev_u16(svuint16_t op) MODE_ATTR @@ -114,12 +114,12 @@ svuint16_t test_svrev_u16(svuint16_t op) MODE_ATTR // CHECK-LABEL: @test_svrev_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[OP:%.*]]) // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svrev_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] // svuint32_t test_svrev_u32(svuint32_t op) MODE_ATTR @@ -129,12 +129,12 @@ svuint32_t test_svrev_u32(svuint32_t op) MODE_ATTR // CHECK-LABEL: @test_svrev_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> [[OP:%.*]]) // CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svrev_u64u12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]] // svuint64_t test_svrev_u64(svuint64_t op) MODE_ATTR @@ -144,12 +144,12 @@ svuint64_t test_svrev_u64(svuint64_t op) MODE_ATTR // CHECK-LABEL: @test_svrev_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> [[OP:%.*]]) // CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svrev_f16u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] // svfloat16_t test_svrev_f16(svfloat16_t op) MODE_ATTR @@ -159,12 +159,12 @@ svfloat16_t test_svrev_f16(svfloat16_t op) MODE_ATTR // CHECK-LABEL: @test_svrev_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[OP:%.*]]) // CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svrev_f32u13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] // svfloat32_t test_svrev_f32(svfloat32_t op) MODE_ATTR @@ -174,12 +174,12 @@ svfloat32_t test_svrev_f32(svfloat32_t op) MODE_ATTR // CHECK-LABEL: @test_svrev_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> [[OP:%.*]]) // CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svrev_f64u13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] // svfloat64_t test_svrev_f64(svfloat64_t op) MODE_ATTR >From 8386dde658d8572d943a09661e584af9f71b1158 Mon Sep 17 00:00:00 2001 From: Jorge Botto <jorge.botto...@ucl.ac.uk> Date: Mon, 27 Jan 2025 20:21:17 +0000 Subject: [PATCH 2/4] Making Clang emit llvm.vector.reverse instead of llvm.aarch64.sve.rev for svrev_bf16 and svrev_b8 --- clang/include/clang/Basic/arm_sve.td | 4 ++-- .../test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c | 4 ++-- clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index c954a6582171728..5b7c64490fff4ec 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1082,7 +1082,7 @@ def SVZIP2 : SInst<"svzip2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNo let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { def SVEXT_BF16 : SInst<"svext[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_ext", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckExtract, 1>]>; -def SVREV_BF16 : SInst<"svrev[_{d}]", "dd", "b", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>; +def SVREV_BF16 : SInst<"svrev[_{d}]", "dd", "b", MergeNone, "vector_reverse", [VerifyRuntimeMode]>; def SVSEL_BF16 : SInst<"svsel[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_sel", [VerifyRuntimeMode]>; def SVSPLICE_BF16 : SInst<"svsplice[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_splice", [VerifyRuntimeMode]>; def SVTRN1_BF16 : SInst<"svtrn1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn1", [VerifyRuntimeMode]>; @@ -1093,7 +1093,7 @@ def SVZIP1_BF16 : SInst<"svzip1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve def SVZIP2_BF16 : SInst<"svzip2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip2", [VerifyRuntimeMode]>; } -def SVREV_B8 : SInst<"svrev_b8", "PP", "Pc", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>; +def SVREV_B8 : SInst<"svrev_b8", "PP", "Pc", MergeNone, "vector_reverse", [VerifyRuntimeMode]>; def SVREV_B16 : SInst<"svrev_b16", "PP", "Pc", MergeNone, "aarch64_sve_rev_b16", [IsOverloadNone, VerifyRuntimeMode]>; def SVREV_B32 : SInst<"svrev_b32", "PP", "Pc", MergeNone, "aarch64_sve_rev_b32", [IsOverloadNone, VerifyRuntimeMode]>; def SVREV_B64 : SInst<"svrev_b64", "PP", "Pc", MergeNone, "aarch64_sve_rev_b64", [IsOverloadNone, VerifyRuntimeMode]>; diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c index 9b3e813fa969472..43c0da842001c88 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c @@ -25,12 +25,12 @@ // CHECK-LABEL: @test_svrev_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> [[OP:%.*]]) // CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_svrev_bf16u14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]] // svbfloat16_t test_svrev_bf16(svbfloat16_t op) MODE_ATTR diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c index 835d1c616aebcb0..856f76e67afcf5a 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c @@ -189,12 +189,12 @@ svfloat64_t test_svrev_f64(svfloat64_t op) MODE_ATTR // CHECK-LABEL: @test_svrev_b8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> [[OP:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> [[OP:%.*]]) // CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]] // // CPP-CHECK-LABEL: @_Z13test_svrev_b8u10__SVBool_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> [[OP:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> [[OP:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP0]] // svbool_t test_svrev_b8(svbool_t op) MODE_ATTR >From a10ac09fafbafe5fc3798437a5ebc940bf81022e Mon Sep 17 00:00:00 2001 From: Jorge Botto <jorge.botto...@ucl.ac.uk> Date: Wed, 5 Feb 2025 00:00:34 +0000 Subject: [PATCH 3/4] Precommiting missing optimisation tests --- llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll | 182 +++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll diff --git a/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll b/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll new file mode 100644 index 000000000000000..8455f2e5118ef7b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll @@ -0,0 +1,182 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming -verify-machineinstrs < %s | FileCheck %s + +define <vscale x 16 x i1> @aarch64_sve_rev_inv(<vscale x 16 x i1> %0) { +; CHECK-LABEL: aarch64_sve_rev_inv: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev p0.b, p0.b +; CHECK-NEXT: rev p0.b, p0.b +; CHECK-NEXT: ret +entry: + %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev(<vscale x 16 x i1> %0) + %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev(<vscale x 16 x i1> %1) + ret <vscale x 16 x i1> %2 +} + +define <vscale x 16 x i1> @aarch64_sve_rev_b16_inv(<vscale x 16 x i1> %0) { +; CHECK-LABEL: aarch64_sve_rev_b16_inv: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev p0.h, p0.h +; CHECK-NEXT: rev p0.h, p0.h +; CHECK-NEXT: ret +entry: + %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %0) + %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %1) + ret <vscale x 16 x i1> %2 +} + +define <vscale x 16 x i1> @aarch64_sve_rev_b32_inv(<vscale x 16 x i1> %0) { +; CHECK-LABEL: aarch64_sve_rev_b32_inv: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev p0.s, p0.s +; CHECK-NEXT: rev p0.s, p0.s +; CHECK-NEXT: ret +entry: + %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %0) + %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %1) + ret <vscale x 16 x i1> %2 +} + +define <vscale x 16 x i1> @aarch64_sve_rev_b64_inv(<vscale x 16 x i1> %0) { +; CHECK-LABEL: aarch64_sve_rev_b64_inv: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev p0.d, p0.d +; CHECK-NEXT: rev p0.d, p0.d +; CHECK-NEXT: ret +entry: + %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %0) + %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %1) + ret <vscale x 16 x i1> %2 +} + +define <vscale x 4 x i32> @aarch64_sve_revb_inv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { +; CHECK-LABEL: aarch64_sve_revb_inv: +; CHECK: // %bb.0: +; CHECK-NEXT: revb z0.s, p0/m, z1.s +; CHECK-NEXT: revb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) + %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %2 +} + +define <vscale x 16 x i8> @aarch64_sve_revd_inv(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) { +; CHECK-LABEL: aarch64_sve_revd_inv: +; CHECK: // %bb.0: +; CHECK-NEXT: revd z0.q, p0/m, z1.q +; CHECK-NEXT: revd z0.q, p0/m, z1.q +; CHECK-NEXT: ret + %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) + %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %2 +} + +define <vscale x 4 x i32> @aarch64_sve_revh_inv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { +; CHECK-LABEL: aarch64_sve_revh_inv: +; CHECK: // %bb.0: +; CHECK-NEXT: revh z0.s, p0/m, z1.s +; CHECK-NEXT: revh z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) + %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %2 +} + +define <vscale x 2 x i64> @aarch64_sve_revw_inv(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) { +; CHECK-LABEL: aarch64_sve_revw_inv: +; CHECK: // %bb.0: +; CHECK-NEXT: revw z0.d, p0/m, z1.d +; CHECK-NEXT: revw z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) + %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %2 +} + +define <vscale x 4 x i32> @test_aarch64_sve_revb_pg_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) { +; CHECK-LABEL: test_aarch64_sve_revb_pg_mismatch: +; CHECK: // %bb.0: +; CHECK-NEXT: revb z0.s, p0/m, z1.s +; CHECK-NEXT: revb z0.s, p1/m, z1.s +; CHECK-NEXT: ret + %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) + %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %2 +} + +define <vscale x 4 x i32> @test_aarch64_sve_revb_b_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %b1) { +; CHECK-LABEL: test_aarch64_sve_revb_b_mismatch: +; CHECK: // %bb.0: +; CHECK-NEXT: revb z0.s, p0/m, z1.s +; CHECK-NEXT: revb z0.s, p0/m, z2.s +; CHECK-NEXT: ret + %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) + %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b1) + ret <vscale x 4 x i32> %2 +} + +define <vscale x 16 x i8> @test_aarch64_sve_revd_pg_mismatch(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg1, <vscale x 16 x i8> %b) { +; CHECK-LABEL: test_aarch64_sve_revd_pg_mismatch: +; CHECK: // %bb.0: +; CHECK-NEXT: revd z0.q, p0/m, z1.q +; CHECK-NEXT: revd z0.q, p1/m, z1.q +; CHECK-NEXT: ret + %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) + %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %1, <vscale x 16 x i1> %pg1, <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %2 +} + +define <vscale x 16 x i8> @test_aarch64_sve_revd_b_mismatch(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> %b1) { +; CHECK-LABEL: test_aarch64_sve_revd_b_mismatch: +; CHECK: // %bb.0: +; CHECK-NEXT: revd z0.q, p0/m, z1.q +; CHECK-NEXT: revd z0.q, p0/m, z2.q +; CHECK-NEXT: ret + %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) + %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b1) + ret <vscale x 16 x i8> %2 +} + +define <vscale x 4 x i32> @test_aarch64_sve_revh_pg_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) { +; CHECK-LABEL: test_aarch64_sve_revh_pg_mismatch: +; CHECK: // %bb.0: +; CHECK-NEXT: revh z0.s, p0/m, z1.s +; CHECK-NEXT: revh z0.s, p1/m, z1.s +; CHECK-NEXT: ret + %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) + %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %2 +} + +define <vscale x 4 x i32> @test_aarch64_sve_revh_b_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %b1) { +; CHECK-LABEL: test_aarch64_sve_revh_b_mismatch: +; CHECK: // %bb.0: +; CHECK-NEXT: revh z0.s, p0/m, z1.s +; CHECK-NEXT: revh z0.s, p0/m, z2.s +; CHECK-NEXT: ret + %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) + %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b1) + ret <vscale x 4 x i32> %2 +} + +define <vscale x 2 x i64> @test_aarch64_sve_revw_pg_mismatch(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg1, <vscale x 2 x i64> %b) { +; CHECK-LABEL: test_aarch64_sve_revw_pg_mismatch: +; CHECK: // %bb.0: +; CHECK-NEXT: revw z0.d, p0/m, z1.d +; CHECK-NEXT: revw z0.d, p1/m, z1.d +; CHECK-NEXT: ret + %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) + %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> %pg1, <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %2 +} + +define <vscale x 2 x i64> @test_aarch64_sve_revw_b_mismatch(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> %b1) { +; CHECK-LABEL: test_aarch64_sve_revw_b_mismatch: +; CHECK: // %bb.0: +; CHECK-NEXT: revw z0.d, p0/m, z1.d +; CHECK-NEXT: revw z0.d, p0/m, z2.d +; CHECK-NEXT: ret + %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) + %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b1) + ret <vscale x 2 x i64> %2 +} >From 5a1829ff82467df54ef4f3dcbb65190a1aaa8fdd Mon Sep 17 00:00:00 2001 From: Jorge Botto <jorge.botto...@ucl.ac.uk> Date: Thu, 6 Feb 2025 01:04:42 +0000 Subject: [PATCH 4/4] Adding missed optimisation --- .../Target/AArch64/AArch64ISelLowering.cpp | 38 +++++++++++++++++++ llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll | 24 ++++-------- 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 34464d317beafe4..323fbde74bf1974 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -21962,6 +21962,35 @@ SDValue tryLowerPartialReductionToWideAdd(SDNode *N, return DAG.getNode(TopOpcode, DL, AccVT, BottomNode, ExtOp); } +static SDValue foldRevInvolution(SDNode *N) { + SDValue InnerRev = N->getOperand(1); + if (!InnerRev.hasOneUse()) + return SDValue(); + + unsigned OuterIId = getIntrinsicID(N); + unsigned InnerIId = getIntrinsicID(InnerRev.getNode()); + if (OuterIId != InnerIId) + return SDValue(); + + switch (OuterIId) { + case Intrinsic::aarch64_sve_revb: + case Intrinsic::aarch64_sve_revd: + case Intrinsic::aarch64_sve_revh: + case Intrinsic::aarch64_sve_revw: + if (N->getOperand(2) != InnerRev.getOperand(2) || + N->getOperand(3) != InnerRev.getOperand(3)) + return SDValue(); + [[fallthrough]]; + case Intrinsic::aarch64_sve_rev: + case Intrinsic::aarch64_sve_rev_b16: + case Intrinsic::aarch64_sve_rev_b32: + case Intrinsic::aarch64_sve_rev_b64: + return InnerRev.getOperand(1); + default: + return SDValue(); + } +} + static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -22270,6 +22299,15 @@ static SDValue performIntrinsicCombine(SDNode *N, return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG); case Intrinsic::aarch64_sve_cmpls_wide: return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG); + case Intrinsic::aarch64_sve_rev: + case Intrinsic::aarch64_sve_rev_b16: + case Intrinsic::aarch64_sve_rev_b32: + case Intrinsic::aarch64_sve_rev_b64: + case Intrinsic::aarch64_sve_revb: + case Intrinsic::aarch64_sve_revd: + case Intrinsic::aarch64_sve_revh: + case Intrinsic::aarch64_sve_revw: + return foldRevInvolution(N); case Intrinsic::aarch64_sve_ptest_any: return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2), AArch64CC::ANY_ACTIVE); diff --git a/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll b/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll index 8455f2e5118ef7b..984845363501b24 100644 --- a/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll +++ b/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll @@ -4,8 +4,6 @@ define <vscale x 16 x i1> @aarch64_sve_rev_inv(<vscale x 16 x i1> %0) { ; CHECK-LABEL: aarch64_sve_rev_inv: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: rev p0.b, p0.b -; CHECK-NEXT: rev p0.b, p0.b ; CHECK-NEXT: ret entry: %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev(<vscale x 16 x i1> %0) @@ -16,8 +14,6 @@ entry: define <vscale x 16 x i1> @aarch64_sve_rev_b16_inv(<vscale x 16 x i1> %0) { ; CHECK-LABEL: aarch64_sve_rev_b16_inv: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: rev p0.h, p0.h -; CHECK-NEXT: rev p0.h, p0.h ; CHECK-NEXT: ret entry: %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %0) @@ -28,8 +24,6 @@ entry: define <vscale x 16 x i1> @aarch64_sve_rev_b32_inv(<vscale x 16 x i1> %0) { ; CHECK-LABEL: aarch64_sve_rev_b32_inv: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: rev p0.s, p0.s -; CHECK-NEXT: rev p0.s, p0.s ; CHECK-NEXT: ret entry: %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %0) @@ -40,8 +34,6 @@ entry: define <vscale x 16 x i1> @aarch64_sve_rev_b64_inv(<vscale x 16 x i1> %0) { ; CHECK-LABEL: aarch64_sve_rev_b64_inv: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: rev p0.d, p0.d -; CHECK-NEXT: rev p0.d, p0.d ; CHECK-NEXT: ret entry: %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %0) @@ -52,8 +44,6 @@ entry: define <vscale x 4 x i32> @aarch64_sve_revb_inv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { ; CHECK-LABEL: aarch64_sve_revb_inv: ; CHECK: // %bb.0: -; CHECK-NEXT: revb z0.s, p0/m, z1.s -; CHECK-NEXT: revb z0.s, p0/m, z1.s ; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) @@ -63,8 +53,6 @@ define <vscale x 4 x i32> @aarch64_sve_revb_inv(<vscale x 4 x i32> %a, <vscale x define <vscale x 16 x i8> @aarch64_sve_revd_inv(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) { ; CHECK-LABEL: aarch64_sve_revd_inv: ; CHECK: // %bb.0: -; CHECK-NEXT: revd z0.q, p0/m, z1.q -; CHECK-NEXT: revd z0.q, p0/m, z1.q ; CHECK-NEXT: ret %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) @@ -74,8 +62,6 @@ define <vscale x 16 x i8> @aarch64_sve_revd_inv(<vscale x 16 x i8> %a, <vscale x define <vscale x 4 x i32> @aarch64_sve_revh_inv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { ; CHECK-LABEL: aarch64_sve_revh_inv: ; CHECK: // %bb.0: -; CHECK-NEXT: revh z0.s, p0/m, z1.s -; CHECK-NEXT: revh z0.s, p0/m, z1.s ; CHECK-NEXT: ret %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) @@ -85,14 +71,13 @@ define <vscale x 4 x i32> @aarch64_sve_revh_inv(<vscale x 4 x i32> %a, <vscale x define <vscale x 2 x i64> @aarch64_sve_revw_inv(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) { ; CHECK-LABEL: aarch64_sve_revw_inv: ; CHECK: // %bb.0: -; CHECK-NEXT: revw z0.d, p0/m, z1.d -; CHECK-NEXT: revw z0.d, p0/m, z1.d ; CHECK-NEXT: ret %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) ret <vscale x 2 x i64> %2 } +; negative test define <vscale x 4 x i32> @test_aarch64_sve_revb_pg_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) { ; CHECK-LABEL: test_aarch64_sve_revb_pg_mismatch: ; CHECK: // %bb.0: @@ -104,6 +89,7 @@ define <vscale x 4 x i32> @test_aarch64_sve_revb_pg_mismatch(<vscale x 4 x i32> ret <vscale x 4 x i32> %2 } +; negative test define <vscale x 4 x i32> @test_aarch64_sve_revb_b_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %b1) { ; CHECK-LABEL: test_aarch64_sve_revb_b_mismatch: ; CHECK: // %bb.0: @@ -115,6 +101,7 @@ define <vscale x 4 x i32> @test_aarch64_sve_revb_b_mismatch(<vscale x 4 x i32> % ret <vscale x 4 x i32> %2 } +; negative test define <vscale x 16 x i8> @test_aarch64_sve_revd_pg_mismatch(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg1, <vscale x 16 x i8> %b) { ; CHECK-LABEL: test_aarch64_sve_revd_pg_mismatch: ; CHECK: // %bb.0: @@ -126,6 +113,7 @@ define <vscale x 16 x i8> @test_aarch64_sve_revd_pg_mismatch(<vscale x 16 x i8> ret <vscale x 16 x i8> %2 } +; negative test define <vscale x 16 x i8> @test_aarch64_sve_revd_b_mismatch(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> %b1) { ; CHECK-LABEL: test_aarch64_sve_revd_b_mismatch: ; CHECK: // %bb.0: @@ -137,6 +125,7 @@ define <vscale x 16 x i8> @test_aarch64_sve_revd_b_mismatch(<vscale x 16 x i8> % ret <vscale x 16 x i8> %2 } +; negative test define <vscale x 4 x i32> @test_aarch64_sve_revh_pg_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) { ; CHECK-LABEL: test_aarch64_sve_revh_pg_mismatch: ; CHECK: // %bb.0: @@ -148,6 +137,7 @@ define <vscale x 4 x i32> @test_aarch64_sve_revh_pg_mismatch(<vscale x 4 x i32> ret <vscale x 4 x i32> %2 } +; negative test define <vscale x 4 x i32> @test_aarch64_sve_revh_b_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %b1) { ; CHECK-LABEL: test_aarch64_sve_revh_b_mismatch: ; CHECK: // %bb.0: @@ -159,6 +149,7 @@ define <vscale x 4 x i32> @test_aarch64_sve_revh_b_mismatch(<vscale x 4 x i32> % ret <vscale x 4 x i32> %2 } +; negative test define <vscale x 2 x i64> @test_aarch64_sve_revw_pg_mismatch(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg1, <vscale x 2 x i64> %b) { ; CHECK-LABEL: test_aarch64_sve_revw_pg_mismatch: ; CHECK: // %bb.0: @@ -170,6 +161,7 @@ define <vscale x 2 x i64> @test_aarch64_sve_revw_pg_mismatch(<vscale x 2 x i64> ret <vscale x 2 x i64> %2 } +; negative test define <vscale x 2 x i64> @test_aarch64_sve_revw_b_mismatch(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> %b1) { ; CHECK-LABEL: test_aarch64_sve_revw_b_mismatch: ; CHECK: // %bb.0: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits