https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/165545
>From 44338e5c9cb5edf233cc6bd5f80e76fbd27098a0 Mon Sep 17 00:00:00 2001 From: Marian Lukac <[email protected]> Date: Mon, 27 Oct 2025 12:04:35 +0000 Subject: [PATCH 1/3] [AArch64] Add intrinsics support for 9.6 crypto --- clang/include/clang/Basic/arm_sve.td | 17 ++ .../sve2p1-intrinsics/acle_sve2p1_crypto.c | 215 ++++++++++++++++++ clang/utils/TableGen/SveEmitter.cpp | 9 +- llvm/include/llvm/IR/IntrinsicsAArch64.td | 27 +++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 64 +++++- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 4 +- .../AArch64/sve2p1-intrinsics-crypto.ll | 155 +++++++++++++ 7 files changed, 477 insertions(+), 14 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index d2b7b78b9970f..605cf93d196ff 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1841,6 +1841,23 @@ def SVPMULLT_PAIR_U64 : SInst<"svpmullt_pair[_{d}]", "ddd", "Ul", MergeNone, def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>; } +let SVETargetGuard = "sve-aes2", SMETargetGuard = "sve-aes2,ssve-aes" in { +def SVAESD_X2 : SInst<"svaesd_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesd_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESIMC_X2 : SInst<"svaesdimc_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesdimc_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESE_X2 : SInst<"svaese_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aese_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESeMC_X2 : SInst<"svaesemc_laneq[_{d}_x2]", "22di", "Uc", MergeNone, "aarch64_sve_aesemc_laneq_x2", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVAESD_X4 : SInst<"svaesd_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesd_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESIMC_X4 : SInst<"svaesdimc_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesdimc_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESE_X4 : SInst<"svaese_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aese_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; +def SVAESeMC_X4 : SInst<"svaesemc_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesemc_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVPMULL_U64 : SInst<"svpmull[_u64]", "2dd", "Ul", MergeNone, "aarch64_sve_pmull", [IsOverloadNone, VerifyRuntimeMode]>; +def SVPMULL_N_U64 : SInst<"svpmull[_n_u64]", "2da", "Ul", MergeNone, "aarch64_sve_pmull", [IsOverloadNone, VerifyRuntimeMode]>; +def SVPMLAL_U64 : SInst<"svpmlal[_u64]", "22dd", "Ul", MergeNone, "aarch64_sve_pmlal", [IsOverloadNone, VerifyRuntimeMode]>; +def SVPMLAL_N_U64 : SInst<"svpmlal[_n_u64]", "22da", "Ul", MergeNone, "aarch64_sve_pmlal", [IsOverloadNone, VerifyRuntimeMode]>; +} + let SVETargetGuard = "sve-sha3", SMETargetGuard = "sme2p1,sve-sha3" in { def SVRAX1 : SInst<"svrax1[_{d}]", "ddd", "lUl", MergeNone, "aarch64_sve_rax1", [IsOverloadNone, VerifyRuntimeMode]>; } diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c new file mode 100644 index 0000000000000..bbe71d9db92cf --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c @@ -0,0 +1,215 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sve-aes2 -target-feature +ssve-aes -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve-aes2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: @test_svaesd_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svaesd_u8_x211svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x2_t test_svaesd_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesd_laneq,_u8_x2,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesdimc_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svaesdimc_u8_x211svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x2_t test_svaesdimc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x2,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaese_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svaese_u8_x211svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x2_t test_svaese_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaese_laneq,_u8_x2,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesemc_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svaesemc_u8_x211svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x2(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x2_t test_svaesemc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x2,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesd_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svaesd_u8_x411svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x4_t test_svaesd_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesd_laneq,_u8_x4,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesdimc_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svaesdimc_u8_x411svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x4_t test_svaesdimc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x4,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaese_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svaese_u8_x411svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x4_t test_svaese_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaese_laneq,_u8_x4,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svaesemc_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svaesemc_u8_x411svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x4(<vscale x 16 x i8> [[OP1_COERCE0:%.*]], <vscale x 16 x i8> [[OP1_COERCE1:%.*]], <vscale x 16 x i8> [[OP1_COERCE2:%.*]], <vscale x 16 x i8> [[OP1_COERCE3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0) +// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]] +// +svuint8x4_t test_svaesemc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x4,,)(op1, op2, 0); +} + +// CHECK-LABEL: @test_svpmull_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svpmull_u64u12__SVUint64_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +svuint64x2_t test_svpmull_u64(svuint64_t op1, svuint64_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svpmull,_u64,,)(op1, op2); +} + +// CHECK-LABEL: @test_svpmull_n_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svpmull_n_u64u12__SVUint64_tm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 +// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +svuint64x2_t test_svpmull_n_u64(svuint64_t op1, uint64_t op2) STREAMING +{ + return SVE_ACLE_FUNC(svpmull,_n_u64,,)(op1, op2); +} + +// CHECK-LABEL: @test_svpmlal_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]]) +// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svpmlal_u6412svuint64x2_tu12__SVUint64_tS0_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]]) +// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +svuint64x2_t test_svpmlal_u64(svuint64x2_t op1, svuint64_t op2, svuint64_t op3) STREAMING +{ + return SVE_ACLE_FUNC(svpmlal,_u64,,)(op1, op2, op3); +} + +// CHECK-LABEL: @test_svpmlal_n_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP3:%.*]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svpmlal_n_u6412svuint64x2_tu12__SVUint64_tm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP3:%.*]], i64 0 +// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] +// +svuint64x2_t test_svpmlal_n_u64(svuint64x2_t op1, svuint64_t op2, uint64_t op3) STREAMING +{ + return SVE_ACLE_FUNC(svpmlal,_n_u64,,)(op1, op2, op3); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index b1e94e0ce0975..bbeed4402da1b 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -272,9 +272,16 @@ class Intrinsic { Proto[I] == 'R' || Proto[I] == '@' || Proto[I] == '!') break; + if (Proto[I] == '2') + Param += 1; + if (Proto[I] == '4') + Param += 3; + // Multivector modifier can be skipped - if (Proto[I] == '.') + if (Proto[I] == '.') { + Param -= 1; // Adjust for the increment at the top of the loop I += 2; + } } assert(I != Proto.size() && "Prototype has no splat operand"); return Param; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index b81edc385cd43..71949c93f83b0 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -4194,4 +4194,31 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic; def int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic; def int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic; + + // AES2 + class SVE2_Crypto_LANE_X2_Intrinsic + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], + [ImmArg<ArgIndex<3>>, IntrNoMem]>; + class SVE2_Crypto_LANE_X4_Intrinsic + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], + [llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, + llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], + [ImmArg<ArgIndex<5>>, IntrNoMem]>; + + def int_aarch64_sve_aesd_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic; + def int_aarch64_sve_aesdimc_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic; + def int_aarch64_sve_aese_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic; + def int_aarch64_sve_aesemc_laneq_x2 : SVE2_Crypto_LANE_X2_Intrinsic; + + def int_aarch64_sve_aesd_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic; + def int_aarch64_sve_aesdimc_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic; + def int_aarch64_sve_aese_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic; + def int_aarch64_sve_aesemc_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic; + + def int_aarch64_sve_pmull : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty], + [llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>; + def int_aarch64_sve_pmlal : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty], + [llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>; } + diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index e7b2d20e2a6cb..2655aadb26c19 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1974,26 +1974,28 @@ void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N, SDLoc DL(N); EVT VT = N->getValueType(0); unsigned FirstVecIdx = HasPred ? 2 : 1; + SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID + SmallVector<SDValue, 4> Ops; - auto GetMultiVecOperand = [=](unsigned StartIdx) { - SmallVector<SDValue, 4> Regs(N->ops().slice(StartIdx, NumVecs)); + auto GetMultiVecOperand = [&]() { + SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs); + OpsIter += NumVecs; return createZMulTuple(Regs); }; - SDValue Zdn = GetMultiVecOperand(FirstVecIdx); + if (HasPred) + Ops.push_back(*OpsIter++); - SDValue Zm; + Ops.push_back(GetMultiVecOperand()); if (IsZmMulti) - Zm = GetMultiVecOperand(NumVecs + FirstVecIdx); + Ops.push_back(GetMultiVecOperand()); else - Zm = N->getOperand(NumVecs + FirstVecIdx); + Ops.push_back(*OpsIter++); + // Append any remaining operands. + Ops.append(OpsIter, N->op_end()); SDNode *Intrinsic; - if (HasPred) - Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, - N->getOperand(1), Zdn, Zm); - else - Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm); + Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops); SDValue SuperReg = SDValue(Intrinsic, 0); for (unsigned i = 0; i < NumVecs; ++i) ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( @@ -6248,6 +6250,46 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D})) SelectDestructiveMultiIntrinsic(Node, 4, true, Op); return; + case Intrinsic::aarch64_sve_aese_laneq_x2: + SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B); + return; + case Intrinsic::aarch64_sve_aesd_laneq_x2: + SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B); + return; + case Intrinsic::aarch64_sve_aesemc_laneq_x2: + SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B); + return; + case Intrinsic::aarch64_sve_aesdimc_laneq_x2: + SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B); + return; + case Intrinsic::aarch64_sve_aese_laneq_x4: + SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B); + return; + case Intrinsic::aarch64_sve_aesd_laneq_x4: + SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B); + return; + case Intrinsic::aarch64_sve_aesemc_laneq_x4: + SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B); + return; + case Intrinsic::aarch64_sve_aesdimc_laneq_x4: + SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B); + return; + case Intrinsic::aarch64_sve_pmlal: + SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q); + return; + case Intrinsic::aarch64_sve_pmull: { + SDLoc DL(Node); + SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2)); + SDNode *Res = + CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs); + SDValue SuperReg = SDValue(Res, 0); + for (unsigned I = 0; I < 2; I++) + ReplaceUses(SDValue(Node, I), + CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT, + SuperReg)); + CurDAG->RemoveDeadNode(Node); + return; + } case Intrinsic::aarch64_sve_fcvtzs_x2: SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS); return; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 3b268dcbca600..7098ac57073cd 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4219,12 +4219,12 @@ let Predicates = [HasSVEAES2, HasNonStreamingSVE_or_SSVE_AES] in { def AESE_2ZZI_B : sve_crypto_binary_multi2<0b000, "aese">; def AESD_2ZZI_B : sve_crypto_binary_multi2<0b010, "aesd">; def AESEMC_2ZZI_B : sve_crypto_binary_multi2<0b100, "aesemc">; - def AESDMIC_2ZZI_B : sve_crypto_binary_multi2<0b110, "aesdimc">; + def AESDIMC_2ZZI_B : sve_crypto_binary_multi2<0b110, "aesdimc">; // SVE_AES2 multi-vector instructions (x4) def AESE_4ZZI_B : sve_crypto_binary_multi4<0b0000, "aese">; def AESD_4ZZI_B : sve_crypto_binary_multi4<0b0100, "aesd">; def AESEMC_4ZZI_B : sve_crypto_binary_multi4<0b1000, "aesemc">; - def AESDMIC_4ZZI_B : sve_crypto_binary_multi4<0b1100, "aesdimc">; + def AESDIMC_4ZZI_B : sve_crypto_binary_multi4<0b1100, "aesdimc">; // SVE_AES2 multi-vector polynomial multiply def PMLAL_2ZZZ_Q : sve_crypto_pmlal_multi<"pmlal">; diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll new file mode 100644 index 0000000000000..5cbe27628e25d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sve-aes2 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sve-aes2,+ssve-aes -force-streaming < %s | FileCheck %s + +; +; AESE +; + +define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aese_x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: aese_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: aese { z0.b, z1.b }, { z0.b, z1.b }, z2.q[0] +; CHECK-NEXT: ret + %out = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, i32 0) + ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %out +} + +define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @aese_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) { +; CHECK-LABEL: aese_x4: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: aese { z0.b - z3.b }, { z0.b - z3.b }, z2.q[0] +; CHECK-NEXT: ret + %out= call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.laneq.x4(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, + <vscale x 16 x i8> %d, <vscale x 16 x i8> %c , i32 0) + ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %out +} + +; +; AESD +; + +define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aesd_x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: aesd_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: aesd { z0.b, z1.b }, { z0.b, z1.b }, z2.q[0] +; CHECK-NEXT: ret + %out= call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, i32 0) + ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %out +} + +define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @aesd_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) { +; CHECK-LABEL: aesd_x4: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: aesd { z0.b - z3.b }, { z0.b - z3.b }, z2.q[0] +; CHECK-NEXT: ret + %out= call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.laneq.x4(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, + <vscale x 16 x i8> %d, <vscale x 16 x i8> %c , i32 0) + ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %out +} + + +; +; AESEMC +; + +define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aesemc_x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: aesemc_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: aesemc { z0.b, z1.b }, { z0.b, z1.b }, z2.q[0] +; CHECK-NEXT: ret + %out= call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, i32 0) + ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %out +} + +define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @aesemc_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) { +; CHECK-LABEL: aesemc_x4: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: aesemc { z0.b - z3.b }, { z0.b - z3.b }, z2.q[0] +; CHECK-NEXT: ret + %out= call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.laneq.x4(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, + <vscale x 16 x i8> %d, <vscale x 16 x i8> %c , i32 0) + ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %out +} + +; +; AESDIMC +; + +define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aesdimc_x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { +; CHECK-LABEL: aesdimc_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: aesdimc { z0.b, z1.b }, { z0.b, z1.b }, z2.q[0] +; CHECK-NEXT: ret + %out= call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, i32 0) + ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %out +} + +define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @aesdimc_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) { +; CHECK-LABEL: aesdimc_x4: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: aesdimc { z0.b - z3.b }, { z0.b - z3.b }, z2.q[0] +; CHECK-NEXT: ret + %out= call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.laneq.x4(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, + <vscale x 16 x i8> %d, <vscale x 16 x i8> %c , i32 0) + ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %out +} + +; +; PMULL +; + +define { <vscale x 2 x i64>, <vscale x 2 x i64> } @pmull_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: pmull_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: pmull { z0.q, z1.q }, z0.d, z1.d +; CHECK-NEXT: ret + %out = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %out +} + + +; +; PMLAL +; + +define { <vscale x 2 x i64>, <vscale x 2 x i64> } @pmlal_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) { +; CHECK-LABEL: pmlal_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: pmlal { z0.q, z1.q }, z2.d, z3.d +; CHECK-NEXT: ret + %out = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, + <vscale x 2 x i64> %d) + ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %out +} >From d86b9ebd30bfcb1a6f1dd8d644ea7db46eedcf7d Mon Sep 17 00:00:00 2001 From: Marian Lukac <[email protected]> Date: Tue, 28 Oct 2025 14:50:50 +0000 Subject: [PATCH 2/3] Remove unnecessary code --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 2655aadb26c19..217fb2adffaac 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1973,7 +1973,6 @@ void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N, SDLoc DL(N); EVT VT = N->getValueType(0); - unsigned FirstVecIdx = HasPred ? 2 : 1; SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID SmallVector<SDValue, 4> Ops; >From 28de915b8a898e79b2f20d6af1ba0302c55a68ed Mon Sep 17 00:00:00 2001 From: Marian Lukac <[email protected]> Date: Fri, 7 Nov 2025 14:50:52 +0000 Subject: [PATCH 3/3] Address review comments --- clang/include/clang/Basic/arm_sve.td | 8 +-- .../sve2p1-intrinsics/acle_sve2p1_crypto.c | 68 +++++++++---------- llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 4 +- .../AArch64/sve2p1-intrinsics-crypto.ll | 4 +- 5 files changed, 44 insertions(+), 44 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 605cf93d196ff..5b104553602a7 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1852,10 +1852,10 @@ def SVAESIMC_X4 : SInst<"svaesdimc_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "a def SVAESE_X4 : SInst<"svaese_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aese_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; def SVAESeMC_X4 : SInst<"svaesemc_laneq[_{d}_x4]", "44di", "Uc", MergeNone, "aarch64_sve_aesemc_laneq_x4", [IsOverloadNone, VerifyRuntimeMode]>; -def SVPMULL_U64 : SInst<"svpmull[_u64]", "2dd", "Ul", MergeNone, "aarch64_sve_pmull", [IsOverloadNone, VerifyRuntimeMode]>; -def SVPMULL_N_U64 : SInst<"svpmull[_n_u64]", "2da", "Ul", MergeNone, "aarch64_sve_pmull", [IsOverloadNone, VerifyRuntimeMode]>; -def SVPMLAL_U64 : SInst<"svpmlal[_u64]", "22dd", "Ul", MergeNone, "aarch64_sve_pmlal", [IsOverloadNone, VerifyRuntimeMode]>; -def SVPMLAL_N_U64 : SInst<"svpmlal[_n_u64]", "22da", "Ul", MergeNone, "aarch64_sve_pmlal", [IsOverloadNone, VerifyRuntimeMode]>; +def SVPMULL_U64 : SInst<"svpmull[_u64_x2]", "2dd", "Ul", MergeNone, "aarch64_sve_pmull_x2", [IsOverloadNone, VerifyRuntimeMode]>; +def SVPMULL_N_U64 : SInst<"svpmull[_n_u64_x2]", "2da", "Ul", MergeNone, "aarch64_sve_pmull_x2", [IsOverloadNone, VerifyRuntimeMode]>; +def SVPMLAL_U64 : SInst<"svpmlal[_u64_x2]", "22dd", "Ul", MergeNone, "aarch64_sve_pmlal_x2", [IsOverloadNone, VerifyRuntimeMode]>; +def SVPMLAL_N_U64 : SInst<"svpmlal[_n_u64_x2]", "22da", "Ul", MergeNone, "aarch64_sve_pmlal_x2", [IsOverloadNone, VerifyRuntimeMode]>; } let SVETargetGuard = "sve-sha3", SMETargetGuard = "sme2p1,sve-sha3" in { diff --git a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c index bbe71d9db92cf..cea8f41285249 100644 --- a/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c +++ b/clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_crypto.c @@ -15,9 +15,9 @@ #ifdef SVE_OVERLOADED_FORMS // A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 #else -#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#define SVE_ACLE_FUNC(A1,A2) A1##A2 #endif #ifdef __ARM_FEATURE_SME @@ -38,7 +38,7 @@ // svuint8x2_t test_svaesd_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING { - return SVE_ACLE_FUNC(svaesd_laneq,_u8_x2,,)(op1, op2, 0); + return SVE_ACLE_FUNC(svaesd_laneq,_u8_x2)(op1, op2, 0); } // CHECK-LABEL: @test_svaesdimc_u8_x2( @@ -53,7 +53,7 @@ svuint8x2_t test_svaesd_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING // svuint8x2_t test_svaesdimc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING { - return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x2,,)(op1, op2, 0); + return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x2)(op1, op2, 0); } // CHECK-LABEL: @test_svaese_u8_x2( @@ -68,7 +68,7 @@ svuint8x2_t test_svaesdimc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING // svuint8x2_t test_svaese_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING { - return SVE_ACLE_FUNC(svaese_laneq,_u8_x2,,)(op1, op2, 0); + return SVE_ACLE_FUNC(svaese_laneq,_u8_x2)(op1, op2, 0); } // CHECK-LABEL: @test_svaesemc_u8_x2( @@ -83,7 +83,7 @@ svuint8x2_t test_svaese_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING // svuint8x2_t test_svaesemc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING { - return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x2,,)(op1, op2, 0); + return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x2)(op1, op2, 0); } // CHECK-LABEL: @test_svaesd_u8_x4( @@ -98,7 +98,7 @@ svuint8x2_t test_svaesemc_u8_x2(svuint8x2_t op1, svuint8_t op2) STREAMING // svuint8x4_t test_svaesd_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING { - return SVE_ACLE_FUNC(svaesd_laneq,_u8_x4,,)(op1, op2, 0); + return SVE_ACLE_FUNC(svaesd_laneq,_u8_x4)(op1, op2, 0); } // CHECK-LABEL: @test_svaesdimc_u8_x4( @@ -113,7 +113,7 @@ svuint8x4_t test_svaesd_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING // svuint8x4_t test_svaesdimc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING { - return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x4,,)(op1, op2, 0); + return SVE_ACLE_FUNC(svaesdimc_laneq,_u8_x4)(op1, op2, 0); } // CHECK-LABEL: @test_svaese_u8_x4( @@ -128,7 +128,7 @@ svuint8x4_t test_svaesdimc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING // svuint8x4_t test_svaese_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING { - return SVE_ACLE_FUNC(svaese_laneq,_u8_x4,,)(op1, op2, 0); + return SVE_ACLE_FUNC(svaese_laneq,_u8_x4)(op1, op2, 0); } // CHECK-LABEL: @test_svaesemc_u8_x4( @@ -143,73 +143,73 @@ svuint8x4_t test_svaese_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING // svuint8x4_t test_svaesemc_u8_x4(svuint8x4_t op1, svuint8_t op2) STREAMING { - return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x4,,)(op1, op2, 0); + return SVE_ACLE_FUNC(svaesemc_laneq,_u8_x4)(op1, op2, 0); } -// CHECK-LABEL: @test_svpmull_u64( +// CHECK-LABEL: @test_svpmull_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) // CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] // -// CPP-CHECK-LABEL: @_Z16test_svpmull_u64u12__SVUint64_tS_( +// CPP-CHECK-LABEL: @_Z19test_svpmull_u64_x2u12__SVUint64_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] // -svuint64x2_t test_svpmull_u64(svuint64_t op1, svuint64_t op2) STREAMING +svuint64x2_t test_svpmull_u64_x2(svuint64_t op1, svuint64_t op2) STREAMING { - return SVE_ACLE_FUNC(svpmull,_u64,,)(op1, op2); + return SVE_ACLE_FUNC(svpmull,_u64_x2)(op1, op2); } -// CHECK-LABEL: @test_svpmull_n_u64( +// CHECK-LABEL: @test_svpmull_n_u64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) // CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] // -// CPP-CHECK-LABEL: @_Z18test_svpmull_n_u64u12__SVUint64_tm( +// CPP-CHECK-LABEL: @_Z21test_svpmull_n_u64_x2u12__SVUint64_tm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull.x2(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] // -svuint64x2_t test_svpmull_n_u64(svuint64_t op1, uint64_t op2) STREAMING +svuint64x2_t test_svpmull_n_u64_x2(svuint64_t op1, uint64_t op2) STREAMING { - return SVE_ACLE_FUNC(svpmull,_n_u64,,)(op1, op2); + return SVE_ACLE_FUNC(svpmull,_n_u64_x2)(op1, op2); } -// CHECK-LABEL: @test_svpmlal_u64( +// CHECK-LABEL: @test_svpmlal_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]]) // CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] // -// CPP-CHECK-LABEL: @_Z16test_svpmlal_u6412svuint64x2_tu12__SVUint64_tS0_( +// CPP-CHECK-LABEL: @_Z19test_svpmlal_u64_x212svuint64x2_tu12__SVUint64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]]) // CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] // -svuint64x2_t test_svpmlal_u64(svuint64x2_t op1, svuint64_t op2, svuint64_t op3) STREAMING +svuint64x2_t test_svpmlal_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint64_t op3) STREAMING { - return SVE_ACLE_FUNC(svpmlal,_u64,,)(op1, op2, op3); + return SVE_ACLE_FUNC(svpmlal,_u64_x2)(op1, op2, op3); } -// CHECK-LABEL: @test_svpmlal_n_u64( +// CHECK-LABEL: @test_svpmlal_n_u64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP3:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) // CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] // -// CPP-CHECK-LABEL: @_Z18test_svpmlal_n_u6412svuint64x2_tu12__SVUint64_tm( +// CPP-CHECK-LABEL: @_Z21test_svpmlal_n_u64_x212svuint64x2_tu12__SVUint64_tm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP3:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.x2(<vscale x 2 x i64> [[OP1_COERCE0:%.*]], <vscale x 2 x i64> [[OP1_COERCE1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]] // -svuint64x2_t test_svpmlal_n_u64(svuint64x2_t op1, svuint64_t op2, uint64_t op3) STREAMING +svuint64x2_t test_svpmlal_n_u64_x2(svuint64x2_t op1, svuint64_t op2, uint64_t op3) STREAMING { - return SVE_ACLE_FUNC(svpmlal,_n_u64,,)(op1, op2, op3); + return SVE_ACLE_FUNC(svpmlal,_n_u64_x2)(op1, op2, op3); } diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 71949c93f83b0..7fd30da6f5db9 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -4216,9 +4216,9 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_aese_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic; def int_aarch64_sve_aesemc_laneq_x4 : SVE2_Crypto_LANE_X4_Intrinsic; - def int_aarch64_sve_pmull : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty], + def int_aarch64_sve_pmull_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty], [llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>; - def int_aarch64_sve_pmlal : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty], + def int_aarch64_sve_pmlal_x2 : DefaultAttrsIntrinsic<[llvm_nxv2i64_ty, llvm_nxv2i64_ty], [llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 217fb2adffaac..b8b5431989997 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -6273,10 +6273,10 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { case Intrinsic::aarch64_sve_aesdimc_laneq_x4: SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B); return; - case Intrinsic::aarch64_sve_pmlal: + case Intrinsic::aarch64_sve_pmlal_x2: SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q); return; - case Intrinsic::aarch64_sve_pmull: { + case Intrinsic::aarch64_sve_pmull_x2: { SDLoc DL(Node); SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2)); SDNode *Res = diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll index 5cbe27628e25d..09cca404a226a 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll @@ -132,7 +132,7 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @pmull_i64(<vscale x 2 x i64> ; CHECK: // %bb.0: ; CHECK-NEXT: pmull { z0.q, z1.q }, z0.d, z1.d ; CHECK-NEXT: ret - %out = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull(<vscale x 2 x i64> %a, + %out = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmull.x2(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %out } @@ -149,7 +149,7 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @pmlal_i64(<vscale x 2 x i64> ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: pmlal { z0.q, z1.q }, z2.d, z3.d ; CHECK-NEXT: ret - %out = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, + %out = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.x2(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %out } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
