https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/124239
>From 2ce077b011a9dcac0f9649493a50819971695b73 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <wangpengcheng...@bytedance.com> Date: Tue, 16 Jul 2024 16:08:16 +0800 Subject: [PATCH 1/2] [RISCV][MC] Support Zvabd instructions Support of these instructions are added: - Vector Single-Width Signed/Unsigned Integer Absolute Difference - Vector Widening Signed/Unsigned Integer Absolute Difference and Accumulate Doc: https://bytedance.larkoffice.com/docx/DqaLdNqNao8WgZxgUJkcqIVPn7g --- .../Driver/print-supported-extensions-riscv.c | 1 + .../test/Preprocessor/riscv-target-features.c | 9 ++ llvm/lib/Target/RISCV/RISCVFeatures.td | 6 + llvm/lib/Target/RISCV/RISCVInstrInfo.td | 1 + llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 11 +- llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td | 25 +++++ llvm/test/CodeGen/RISCV/attributes.ll | 4 + llvm/test/MC/RISCV/rvv/zvabd-invalid.s | 18 +++ llvm/test/MC/RISCV/rvv/zvabd.s | 105 ++++++++++++++++++ .../TargetParser/RISCVISAInfoTest.cpp | 1 + 10 files changed, 176 insertions(+), 5 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td create mode 100644 llvm/test/MC/RISCV/rvv/zvabd-invalid.s create mode 100644 llvm/test/MC/RISCV/rvv/zvabd.s diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index ae3a1c29df3976..6c9ee75390fa88 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -183,6 +183,7 @@ // CHECK-NEXT: zicfilp 1.0 'Zicfilp' (Landing pad) // CHECK-NEXT: zicfiss 1.0 'Zicfiss' (Shadow stack) // CHECK-NEXT: zalasr 0.1 'Zalasr' (Load-Acquire and Store-Release Instructions) +// CHECK-NEXT: zvabd 0.2 'Zvabd' (Vector Absolute Difference) // CHECK-NEXT: zvbc32e 0.7 'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements) // CHECK-NEXT: zvkgs 0.7 'Zvkgs' (Vector-Scalar GCM instructions for Cryptography) // CHECK-NEXT: sdext 1.0 'Sdext' (External debugger) diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index c2197711352757..2725c283f107d1 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -145,6 +145,7 @@ // CHECK-NOT: __riscv_zksh {{.*$}} // CHECK-NOT: __riscv_zkt {{.*$}} // CHECK-NOT: __riscv_zmmul {{.*$}} +// CHECK-NOT: __riscv_zvabd {{.*$}} // CHECK-NOT: __riscv_zvbb {{.*$}} // CHECK-NOT: __riscv_zvbc {{.*$}} // CHECK-NOT: __riscv_zve32f {{.*$}} @@ -1504,6 +1505,14 @@ // RUN: -o - | FileCheck --check-prefix=CHECK-ZFA-EXT %s // CHECK-ZFA-EXT: __riscv_zfa 1000000{{$}} +// RUN: %clang --target=riscv32 -menable-experimental-extensions \ +// RUN: -march=rv32i_zve64x_zvabd0p2 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZVABD-EXT %s +// RUN: %clang --target=riscv64 -menable-experimental-extensions \ +// RUN: -march=rv64i_zve64x_zvabd0p2 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZVABD-EXT %s +// CHECK-ZVABD-EXT: __riscv_zvabd 2000{{$}} + // RUN: %clang --target=riscv32 \ // RUN: -march=rv32i_zve64x_zvbb1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZVBB-EXT %s diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 4119dd77804f1a..0937f378ca3d14 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -646,6 +646,12 @@ def FeatureStdExtV [FeatureStdExtZvl128b, FeatureStdExtZve64d]>, RISCVExtensionBitmask<0, 21>; +def FeatureStdExtZvabd + : RISCVExperimentalExtension<0, 2, "Vector Absolute Difference">; +def HasStdExtZvabd : Predicate<"Subtarget->hasStdExtZvabd()">, + AssemblerPredicate<(all_of FeatureStdExtZvabd), + "'Zvabd' (Vector Absolute Difference)">; + def FeatureStdExtZvfbfmin : RISCVExtension<1, 0, "Vector BF16 Converts", [FeatureStdExtZve32f]>; def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index bb5bb6352c32a5..0e0a05e8fc03b1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -2110,6 +2110,7 @@ include "RISCVInstrInfoZk.td" // Vector include "RISCVInstrInfoV.td" include "RISCVInstrInfoZvk.td" +include "RISCVInstrInfoZvabd.td" // Compressed include "RISCVInstrInfoC.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 24a881dc6810f8..0d06efe6a488af 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -575,15 +575,16 @@ multiclass VALU_IV_X<string opcodestr, bits<6> funct6> { SchedBinaryMC<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX">; } -multiclass VALU_IV_I<string opcodestr, bits<6> funct6> { - def I : VALUVI<funct6, opcodestr # ".vi">, - SchedUnaryMC<"WriteVIALUI", "ReadVIALUV">; +multiclass VALU_IV_I<string opcodestr, bits<6> funct6, Operand optype = simm5> { + def I : VALUVI<funct6, opcodestr#".vi", optype>, + SchedUnaryMC<"WriteVIALUI", "ReadVIALUV">; } -multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6> +multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6, + Operand optype = simm5> : VALU_IV_V<opcodestr, funct6>, VALU_IV_X<opcodestr, funct6>, - VALU_IV_I<opcodestr, funct6>; + VALU_IV_I<opcodestr, funct6, optype>; multiclass VALU_IV_V_X<string opcodestr, bits<6> funct6> : VALU_IV_V<opcodestr, funct6>, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td new file mode 100644 index 00000000000000..7a8f79ccfd465b --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td @@ -0,0 +1,25 @@ +//===-- RISCVInstrInfoZvabd.td - 'Zvabd' instructions ------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file describes the RISC-V instructions for 'Zvabd' (Vector Absolute +/// Difference). +/// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction Definitions +//===----------------------------------------------------------------------===// +let Predicates = [HasStdExtZvabd] in { + defm VABD_V : VAALU_MV_V_X<"vabd", 0b010001>; + defm VABDU_V : VAALU_MV_V_X<"vabdu", 0b010011>; + + let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { + defm VWABDACC_V : VALU_MV_V_X<"vwabdacc", 0b010101, "v">; + defm VWABDACCU_V : VALU_MV_V_X<"vwabdaccu", 0b010110, "v">; + } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV +} // Predicates = [HasStdExtZvabd] diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index caed0bdfb04984..a36d8c16a318e0 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -121,6 +121,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zve32x -mattr=+zvksh %s -o - | FileCheck --check-prefix=RV32ZVKSH %s ; RUN: llc -mtriple=riscv32 -mattr=+zve32x -mattr=+zvkt %s -o - | FileCheck --check-prefix=RV32ZVKT %s ; RUN: llc -mtriple=riscv32 -mattr=+zvfh %s -o - | FileCheck --check-prefix=RV32ZVFH %s +; RUN: llc -mtriple=riscv32 -mattr=+zve32x -mattr=+experimental-zvabd %s -o - | FileCheck --check-prefix=RV32ZVABD %s ; RUN: llc -mtriple=riscv32 -mattr=+zicond %s -o - | FileCheck --check-prefix=RV32ZICOND %s ; RUN: llc -mtriple=riscv32 -mattr=+zimop %s -o - | FileCheck --check-prefix=RV32ZIMOP %s ; RUN: llc -mtriple=riscv32 -mattr=+zcmop %s -o - | FileCheck --check-prefix=RV32ZCMOP %s @@ -270,6 +271,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+zvksh %s -o - | FileCheck --check-prefix=RV64ZVKSH %s ; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+zvkt %s -o - | FileCheck --check-prefix=RV64ZVKT %s ; RUN: llc -mtriple=riscv64 -mattr=+zvfh %s -o - | FileCheck --check-prefix=RV64ZVFH %s +; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+experimental-zvabd %s -o - | FileCheck --check-prefix=RV64ZVABD %s ; RUN: llc -mtriple=riscv64 -mattr=+zicond %s -o - | FileCheck --check-prefix=RV64ZICOND %s ; RUN: llc -mtriple=riscv64 -mattr=+zimop %s -o - | FileCheck --check-prefix=RV64ZIMOP %s ; RUN: llc -mtriple=riscv64 -mattr=+zcmop %s -o - | FileCheck --check-prefix=RV64ZCMOP %s @@ -437,6 +439,7 @@ ; RV32ZVKSH: .attribute 5, "rv32i2p1_zicsr2p0_zve32x1p0_zvksh1p0_zvl32b1p0" ; RV32ZVKT: .attribute 5, "rv32i2p1_zicsr2p0_zve32x1p0_zvkt1p0_zvl32b1p0" ; RV32ZVFH: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zfhmin1p0_zve32f1p0_zve32x1p0_zvfh1p0_zvfhmin1p0_zvl32b1p0" +; RV32ZVABD: .attribute 5, "rv32i2p1_zicsr2p0_zvabd0p2_zve32x1p0_zvl32b1p0" ; RV32ZICOND: .attribute 5, "rv32i2p1_zicond1p0" ; RV32ZIMOP: .attribute 5, "rv32i2p1_zimop1p0" ; RV32ZCMOP: .attribute 5, "rv32i2p1_zca1p0_zcmop1p0" @@ -584,6 +587,7 @@ ; RV64ZVKSH: .attribute 5, "rv64i2p1_zicsr2p0_zve32x1p0_zvksh1p0_zvl32b1p0" ; RV64ZVKT: .attribute 5, "rv64i2p1_zicsr2p0_zve32x1p0_zvkt1p0_zvl32b1p0" ; RV64ZVFH: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zfhmin1p0_zve32f1p0_zve32x1p0_zvfh1p0_zvfhmin1p0_zvl32b1p0" +; RV64ZVABD: .attribute 5, "rv64i2p1_zicsr2p0_zvabd0p2_zve32x1p0_zvl32b1p0" ; RV64ZICOND: .attribute 5, "rv64i2p1_zicond1p0" ; RV64ZIMOP: .attribute 5, "rv64i2p1_zimop1p0" ; RV64ZCMOP: .attribute 5, "rv64i2p1_zca1p0_zcmop1p0" diff --git a/llvm/test/MC/RISCV/rvv/zvabd-invalid.s b/llvm/test/MC/RISCV/rvv/zvabd-invalid.s new file mode 100644 index 00000000000000..da9184364020ab --- /dev/null +++ b/llvm/test/MC/RISCV/rvv/zvabd-invalid.s @@ -0,0 +1,18 @@ +# RUN: not llvm-mc -triple=riscv64 --mattr=+zve64x --mattr=+experimental-zvabd %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +vwabdacc.vv v9, v9, v8 +# CHECK-ERROR: [[@LINE-1]]:13: error: the destination vector register group cannot overlap the source vector register group +# CHECK-ERROR-LABEL: vwabdacc.vv v9, v9, v8 + +vwabdacc.vx v9, v9, a0 +# CHECK-ERROR: [[@LINE-1]]:13: error: the destination vector register group cannot overlap the source vector register group +# CHECK-ERROR-LABEL: vwabdacc.vx v9, v9, a0 + +vwabdaccu.vv v9, v9, v8 +# CHECK-ERROR: [[@LINE-1]]:14: error: the destination vector register group cannot overlap the source vector register group +# CHECK-ERROR-LABEL: vwabdaccu.vv v9, v9, v8 + +vwabdaccu.vx v9, v9, a0 +# CHECK-ERROR: [[@LINE-1]]:14: error: the destination vector register group cannot overlap the source vector register group +# CHECK-ERROR-LABEL: vwabdaccu.vx v9, v9, a0 diff --git a/llvm/test/MC/RISCV/rvv/zvabd.s b/llvm/test/MC/RISCV/rvv/zvabd.s new file mode 100644 index 00000000000000..d765e01c52081b --- /dev/null +++ b/llvm/test/MC/RISCV/rvv/zvabd.s @@ -0,0 +1,105 @@ +# RUN: llvm-mc -triple=riscv32 -show-encoding --mattr=+v --mattr=+experimental-zvabd %s \ +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: not llvm-mc -triple=riscv32 -show-encoding %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR +# RUN: llvm-mc -triple=riscv32 -filetype=obj --mattr=+v --mattr=+experimental-zvabd %s \ +# RUN: | llvm-objdump -d --mattr=+v --mattr=+experimental-zvabd --no-print-imm-hex - \ +# RUN: | FileCheck %s --check-prefix=CHECK-INST +# RUN: llvm-mc -triple=riscv32 -filetype=obj --mattr=+v --mattr=+experimental-zvabd %s \ +# RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN + +vabd.vv v10, v9, v8 +# CHECK-INST: vabd.vv v10, v9, v8 +# CHECK-ENCODING: [0x57,0x25,0x94,0x46] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 46942557 <unknown> + +vabd.vv v10, v9, v8, v0.t +# CHECK-INST: vabd.vv v10, v9, v8, v0.t +# CHECK-ENCODING: [0x57,0x25,0x94,0x44] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 44942557 <unknown> + +vabd.vx v10, v9, a0 +# CHECK-INST: vabd.vx v10, v9, a0 +# CHECK-ENCODING: [0x57,0x65,0x95,0x46] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 46956557 <unknown> + +vabd.vx v10, v9, a0, v0.t +# CHECK-INST: vabd.vx v10, v9, a0, v0.t +# CHECK-ENCODING: [0x57,0x65,0x95,0x44] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 44956557 <unknown> + +vabdu.vv v10, v9, v8 +# CHECK-INST: vabdu.vv v10, v9, v8 +# CHECK-ENCODING: [0x57,0x25,0x94,0x4e] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 4e942557 <unknown> + +vabdu.vv v10, v9, v8, v0.t +# CHECK-INST: vabdu.vv v10, v9, v8, v0.t +# CHECK-ENCODING: [0x57,0x25,0x94,0x4c] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 4c942557 <unknown> + +vabdu.vx v10, v9, a0 +# CHECK-INST: vabdu.vx v10, v9, a0 +# CHECK-ENCODING: [0x57,0x65,0x95,0x4e] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 4e956557 <unknown> + +vabdu.vx v10, v9, a0, v0.t +# CHECK-INST: vabdu.vx v10, v9, a0, v0.t +# CHECK-ENCODING: [0x57,0x65,0x95,0x4c] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 4c956557 <unknown> + +vwabdacc.vv v10, v9, v8 +# CHECK-INST: vwabdacc.vv v10, v9, v8 +# CHECK-ENCODING: [0x57,0x25,0x94,0x56] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 56942557 <unknown> + +vwabdacc.vv v10, v9, v8, v0.t +# CHECK-INST: vwabdacc.vv v10, v9, v8, v0.t +# CHECK-ENCODING: [0x57,0x25,0x94,0x54] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 54942557 <unknown> + +vwabdacc.vx v10, v9, a0 +# CHECK-INST: vwabdacc.vx v10, v9, a0 +# CHECK-ENCODING: [0x57,0x65,0x95,0x56] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 56956557 <unknown> + +vwabdacc.vx v10, v9, a0, v0.t +# CHECK-INST: vwabdacc.vx v10, v9, a0, v0.t +# CHECK-ENCODING: [0x57,0x65,0x95,0x54] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 54956557 <unknown> + +vwabdaccu.vv v10, v9, v8 +# CHECK-INST: vwabdaccu.vv v10, v9, v8 +# CHECK-ENCODING: [0x57,0x25,0x94,0x5a] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 5a942557 <unknown> + +vwabdaccu.vv v10, v9, v8, v0.t +# CHECK-INST: vwabdaccu.vv v10, v9, v8, v0.t +# CHECK-ENCODING: [0x57,0x25,0x94,0x58] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 58942557 <unknown> + +vwabdaccu.vx v10, v9, a0 +# CHECK-INST: vwabdaccu.vx v10, v9, a0 +# CHECK-ENCODING: [0x57,0x65,0x95,0x5a] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 5a956557 <unknown> + +vwabdaccu.vx v10, v9, a0, v0.t +# CHECK-INST: vwabdaccu.vx v10, v9, a0, v0.t +# CHECK-ENCODING: [0x57,0x65,0x95,0x58] +# CHECK-ERROR: instruction requires the following: 'Zvabd' (Vector Absolute Difference){{$}} +# CHECK-UNKNOWN: 58956557 <unknown> diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index 14a60c1857f24f..c279790a80d594 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -1109,6 +1109,7 @@ Experimental extensions zicfilp 1.0 This is a long dummy description zicfiss 1.0 zalasr 0.1 + zvabd 0.2 zvbc32e 0.7 zvkgs 0.7 sdext 1.0 >From b2cbf11fec3dd9b5d37b4c49eb159afd14f29fcf Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <wangpengcheng...@bytedance.com> Date: Fri, 24 Jan 2025 15:55:53 +0800 Subject: [PATCH 2/2] [RISCV][CodeGen] Lowering abds/abdu to Zvabd instructions We directly lower `ISD::ABDS`/`ISD::ABDU` to Zvabd instructions. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 17 +- llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 + .../Target/RISCV/RISCVInstrInfoVPseudos.td | 10 +- llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td | 22 ++ llvm/test/CodeGen/RISCV/rvv/abd.ll | 132 ++++++++ .../CodeGen/RISCV/rvv/fixed-vectors-abd.ll | 284 ++++++++++++++++++ .../CodeGen/RISCV/rvv/fixed-vectors-sad.ll | 83 +++++ 7 files changed, 544 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 618fb28d3e9f9a..0bfcf21351b465 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DiagnosticInfo.h" @@ -830,7 +831,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT, Legal); - setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom); + if (Subtarget.hasStdExtZvabd()) + setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Legal); + else + setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom); // Custom-lower extensions and truncations from/to mask types. setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, @@ -6400,6 +6404,8 @@ static unsigned getRISCVVLOp(SDValue Op) { OP_CASE(SMAX) OP_CASE(UMIN) OP_CASE(UMAX) + OP_CASE(ABDS) + OP_CASE(ABDU) OP_CASE(STRICT_FADD) OP_CASE(STRICT_FSUB) OP_CASE(STRICT_FMUL) @@ -6502,7 +6508,7 @@ static bool hasPassthruOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 129 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL) @@ -6526,7 +6532,7 @@ static bool hasMaskOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert( - RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 && + RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 129 && RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 && "adding target specific op should update this function"); if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL) @@ -7530,6 +7536,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerToScalableOp(Op, DAG); case ISD::ABDS: case ISD::ABDU: { + if (Subtarget.hasStdExtZvabd()) + return lowerToScalableOp(Op, DAG); + SDLoc dl(Op); EVT VT = Op->getValueType(0); SDValue LHS = DAG.getFreeze(Op->getOperand(0)); @@ -21020,6 +21029,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VZEXT_VL) NODE_NAME_CASE(VCPOP_VL) NODE_NAME_CASE(VFIRST_VL) + NODE_NAME_CASE(ABDS_VL) + NODE_NAME_CASE(ABDU_VL) NODE_NAME_CASE(READ_CSR) NODE_NAME_CASE(WRITE_CSR) NODE_NAME_CASE(SWAP_CSR) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 77605a3076a80a..ed2244be25eb74 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -282,6 +282,10 @@ enum NodeType : unsigned { UMIN_VL, UMAX_VL, + // Vector Absolute Difference. + ABDS_VL, + ABDU_VL, + BITREVERSE_VL, BSWAP_VL, CTLZ_VL, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 268bfe70673a2a..4bf5ba1edea801 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -2850,11 +2850,11 @@ multiclass VPseudoVFRDIV_VF_RM { } } -multiclass VPseudoVALU_VV_VX { - foreach m = MxList in { - defm "" : VPseudoBinaryV_VV<m>, - SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", m.MX, - forcePassthruRead=true>; +multiclass VPseudoVALU_VV_VX<bit Commutable = 0> { + foreach m = MxList in { + defm "" : VPseudoBinaryV_VV<m, Commutable = Commutable>, + SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", m.MX, + forcePassthruRead = true>; defm "" : VPseudoBinaryV_VX<m>, SchedBinary<"WriteVIALUX", "ReadVIALUV", "ReadVIALUX", m.MX, forcePassthruRead=true>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td index 7a8f79ccfd465b..6adc28f89b456a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvabd.td @@ -23,3 +23,25 @@ let Predicates = [HasStdExtZvabd] in { defm VWABDACCU_V : VALU_MV_V_X<"vwabdaccu", 0b010110, "v">; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV } // Predicates = [HasStdExtZvabd] + +//===----------------------------------------------------------------------===// +// Pseudos +//===----------------------------------------------------------------------===// +let Predicates = [HasStdExtZvabd] in { + defm PseudoVABD : VPseudoVALU_VV_VX<Commutable = 1>; + defm PseudoVABDU : VPseudoVALU_VV_VX<Commutable = 1>; +} // Predicates = [HasStdExtZvabd] + +//===----------------------------------------------------------------------===// +// CodeGen Patterns +//===----------------------------------------------------------------------===// +def riscv_abds_vl + : SDNode<"RISCVISD::ABDS_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; +def riscv_abdu_vl + : SDNode<"RISCVISD::ABDU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; + +defm : VPatBinarySDNode_VV_VX<abds, "PseudoVABD">; +defm : VPatBinarySDNode_VV_VX<abdu, "PseudoVABDU">; + +defm : VPatBinaryVL_VV_VX<riscv_abds_vl, "PseudoVABD">; +defm : VPatBinaryVL_VV_VX<riscv_abdu_vl, "PseudoVABDU">; diff --git a/llvm/test/CodeGen/RISCV/rvv/abd.ll b/llvm/test/CodeGen/RISCV/rvv/abd.ll index 5e610c453e1bac..249a405c3470c6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/abd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/abd.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+experimental-zvabd -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVABD,ZVABD-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+experimental-zvabd -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVABD,ZVABD-RV64 ; ; SABD @@ -14,6 +16,12 @@ define <vscale x 16 x i8> @sabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) ; CHECK-NEXT: vmax.vv v8, v8, v10 ; CHECK-NEXT: vsub.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_b: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v10 +; ZVABD-NEXT: ret %a.sext = sext <vscale x 16 x i8> %a to <vscale x 16 x i16> %b.sext = sext <vscale x 16 x i8> %b to <vscale x 16 x i16> %sub = sub <vscale x 16 x i16> %a.sext, %b.sext @@ -30,6 +38,14 @@ define <vscale x 16 x i8> @sabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_b_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; ZVABD-NEXT: vmxor.mm v0, v0, v8 +; ZVABD-NEXT: vmv.v.i v8, 0 +; ZVABD-NEXT: vmerge.vim v8, v8, 1, v0 +; ZVABD-NEXT: ret %a.sext = sext <vscale x 16 x i1> %a to <vscale x 16 x i8> %b.sext = sext <vscale x 16 x i1> %b to <vscale x 16 x i8> %sub = sub <vscale x 16 x i8> %a.sext, %b.sext @@ -45,6 +61,12 @@ define <vscale x 8 x i16> @sabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) ; CHECK-NEXT: vmax.vv v8, v8, v10 ; CHECK-NEXT: vsub.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_h: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v10 +; ZVABD-NEXT: ret %a.sext = sext <vscale x 8 x i16> %a to <vscale x 8 x i32> %b.sext = sext <vscale x 8 x i16> %b to <vscale x 8 x i32> %sub = sub <vscale x 8 x i32> %a.sext, %b.sext @@ -63,6 +85,14 @@ define <vscale x 8 x i16> @sabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_h_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVABD-NEXT: vabd.vv v10, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v10 +; ZVABD-NEXT: ret %a.sext = sext <vscale x 8 x i8> %a to <vscale x 8 x i16> %b.sext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16> %sub = sub <vscale x 8 x i16> %a.sext, %b.sext @@ -78,6 +108,12 @@ define <vscale x 4 x i32> @sabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) ; CHECK-NEXT: vmax.vv v8, v8, v10 ; CHECK-NEXT: vsub.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_s: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v10 +; ZVABD-NEXT: ret %a.sext = sext <vscale x 4 x i32> %a to <vscale x 4 x i64> %b.sext = sext <vscale x 4 x i32> %b to <vscale x 4 x i64> %sub = sub <vscale x 4 x i64> %a.sext, %b.sext @@ -96,6 +132,14 @@ define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_s_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVABD-NEXT: vabd.vv v10, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v10 +; ZVABD-NEXT: ret %a.sext = sext <vscale x 4 x i16> %a to <vscale x 4 x i32> %b.sext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32> %sub = sub <vscale x 4 x i32> %a.sext, %b.sext @@ -123,6 +167,14 @@ define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_d_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVABD-NEXT: vabd.vv v10, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v10 +; ZVABD-NEXT: ret %a.sext = sext <vscale x 2 x i32> %a to <vscale x 2 x i64> %b.sext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64> %sub = sub <vscale x 2 x i64> %a.sext, %b.sext @@ -142,6 +194,12 @@ define <vscale x 16 x i8> @uabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) ; CHECK-NEXT: vmaxu.vv v8, v8, v10 ; CHECK-NEXT: vsub.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_b: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v10 +; ZVABD-NEXT: ret %a.zext = zext <vscale x 16 x i8> %a to <vscale x 16 x i16> %b.zext = zext <vscale x 16 x i8> %b to <vscale x 16 x i16> %sub = sub <vscale x 16 x i16> %a.zext, %b.zext @@ -158,6 +216,14 @@ define <vscale x 16 x i8> @uabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_b_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; ZVABD-NEXT: vmxor.mm v0, v0, v8 +; ZVABD-NEXT: vmv.v.i v8, 0 +; ZVABD-NEXT: vmerge.vim v8, v8, 1, v0 +; ZVABD-NEXT: ret %a.zext = zext <vscale x 16 x i1> %a to <vscale x 16 x i8> %b.zext = zext <vscale x 16 x i1> %b to <vscale x 16 x i8> %sub = sub <vscale x 16 x i8> %a.zext, %b.zext @@ -173,6 +239,12 @@ define <vscale x 8 x i16> @uabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) ; CHECK-NEXT: vmaxu.vv v8, v8, v10 ; CHECK-NEXT: vsub.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_h: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v10 +; ZVABD-NEXT: ret %a.zext = zext <vscale x 8 x i16> %a to <vscale x 8 x i32> %b.zext = zext <vscale x 8 x i16> %b to <vscale x 8 x i32> %sub = sub <vscale x 8 x i32> %a.zext, %b.zext @@ -191,6 +263,14 @@ define <vscale x 8 x i16> @uabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_h_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v10, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v10 +; ZVABD-NEXT: ret %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i16> %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i16> %sub = sub <vscale x 8 x i16> %a.zext, %b.zext @@ -206,6 +286,12 @@ define <vscale x 4 x i32> @uabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) ; CHECK-NEXT: vmaxu.vv v8, v8, v10 ; CHECK-NEXT: vsub.vv v8, v8, v12 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_s: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v10 +; ZVABD-NEXT: ret %a.zext = zext <vscale x 4 x i32> %a to <vscale x 4 x i64> %b.zext = zext <vscale x 4 x i32> %b to <vscale x 4 x i64> %sub = sub <vscale x 4 x i64> %a.zext, %b.zext @@ -224,6 +310,14 @@ define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_s_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v10, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v10 +; ZVABD-NEXT: ret %a.zext = zext <vscale x 4 x i16> %a to <vscale x 4 x i32> %b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32> %sub = sub <vscale x 4 x i32> %a.zext, %b.zext @@ -251,6 +345,14 @@ define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_d_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v10, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v10 +; ZVABD-NEXT: ret %a.zext = zext <vscale x 2 x i32> %a to <vscale x 2 x i64> %b.zext = zext <vscale x 2 x i32> %b to <vscale x 2 x i64> %sub = sub <vscale x 2 x i64> %a.zext, %b.zext @@ -269,6 +371,13 @@ define <vscale x 4 x i32> @uabd_non_matching_extension(<vscale x 4 x i32> %a, <v ; CHECK-NEXT: vmaxu.vv v8, v8, v12 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_non_matching_extension: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVABD-NEXT: vzext.vf4 v12, v10 +; ZVABD-NEXT: vabdu.vv v8, v8, v12 +; ZVABD-NEXT: ret %a.zext = zext <vscale x 4 x i32> %a to <vscale x 4 x i64> %b.zext = zext <vscale x 4 x i8> %b to <vscale x 4 x i64> %sub = sub <vscale x 4 x i64> %a.zext, %b.zext @@ -290,6 +399,15 @@ define <vscale x 4 x i32> @uabd_non_matching_promoted_ops(<vscale x 4 x i8> %a, ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_non_matching_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVABD-NEXT: vzext.vf2 v10, v8 +; ZVABD-NEXT: vabdu.vv v10, v10, v9 +; ZVABD-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v10 +; ZVABD-NEXT: ret %a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32> %b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32> %sub = sub <vscale x 4 x i32> %a.zext, %b.zext @@ -311,6 +429,18 @@ define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vs ; CHECK-NEXT: vrsub.vi v8, v10, 0 ; CHECK-NEXT: vmax.vv v8, v10, v8 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_non_matching_promotion: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; ZVABD-NEXT: vzext.vf4 v10, v8 +; ZVABD-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVABD-NEXT: vsext.vf2 v8, v9 +; ZVABD-NEXT: vwsub.wv v10, v10, v8 +; ZVABD-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVABD-NEXT: vrsub.vi v8, v10, 0 +; ZVABD-NEXT: vmax.vv v8, v10, v8 +; ZVABD-NEXT: ret %a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32> %b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32> %sub = sub <vscale x 4 x i32> %a.zext, %b.zext @@ -333,3 +463,5 @@ declare <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128>, i1) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; RV32: {{.*}} ; RV64: {{.*}} +; ZVABD-RV32: {{.*}} +; ZVABD-RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abd.ll index bd1209a17b5345..07bdb805ba46be 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abd.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+experimental-zvabd -verify-machineinstrs | FileCheck %s --check-prefixes=ZVABD,ZVABD-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+experimental-zvabd -verify-machineinstrs | FileCheck %s --check-prefixes=ZVABD,ZVABD-RV64 ; ; SABD ; @@ -14,6 +16,12 @@ define <8 x i8> @sabd_8b_as_16b(<8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_8b_as_16b: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.sext = sext <8 x i8> %a to <8 x i16> %b.sext = sext <8 x i8> %b to <8 x i16> %sub = sub <8 x i16> %a.sext, %b.sext @@ -31,6 +39,12 @@ define <8 x i8> @sabd_8b_as_32b(<8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_8b_as_32b: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.sext = sext <8 x i8> %a to <8 x i32> %b.sext = sext <8 x i8> %b to <8 x i32> %sub = sub <8 x i32> %a.sext, %b.sext @@ -48,6 +62,12 @@ define <16 x i8> @sabd_16b(<16 x i8> %a, <16 x i8> %b) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_16b: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.sext = sext <16 x i8> %a to <16 x i16> %b.sext = sext <16 x i8> %b to <16 x i16> %sub = sub <16 x i16> %a.sext, %b.sext @@ -65,6 +85,12 @@ define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_4h: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.sext = sext <4 x i16> %a to <4 x i32> %b.sext = sext <4 x i16> %b to <4 x i32> %sub = sub <4 x i32> %a.sext, %b.sext @@ -84,6 +110,14 @@ define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) { ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_4h_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVABD-NEXT: vabd.vv v9, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v9 +; ZVABD-NEXT: ret %a.sext = sext <4 x i8> %a to <4 x i16> %b.sext = sext <4 x i8> %b to <4 x i16> %sub = sub <4 x i16> %a.sext, %b.sext @@ -100,6 +134,12 @@ define <8 x i16> @sabd_8h(<8 x i16> %a, <8 x i16> %b) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_8h: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.sext = sext <8 x i16> %a to <8 x i32> %b.sext = sext <8 x i16> %b to <8 x i32> %sub = sub <8 x i32> %a.sext, %b.sext @@ -119,6 +159,14 @@ define <8 x i16> @sabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_8h_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZVABD-NEXT: vabd.vv v9, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v9 +; ZVABD-NEXT: ret %a.sext = sext <8 x i8> %a to <8 x i16> %b.sext = sext <8 x i8> %b to <8 x i16> %sub = sub <8 x i16> %a.sext, %b.sext @@ -135,6 +183,12 @@ define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_2s: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.sext = sext <2 x i32> %a to <2 x i64> %b.sext = sext <2 x i32> %b to <2 x i64> %sub = sub <2 x i64> %a.sext, %b.sext @@ -154,6 +208,14 @@ define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) { ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_2s_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVABD-NEXT: vabd.vv v9, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v9 +; ZVABD-NEXT: ret %a.sext = sext <2 x i16> %a to <2 x i32> %b.sext = sext <2 x i16> %b to <2 x i32> %sub = sub <2 x i32> %a.sext, %b.sext @@ -170,6 +232,12 @@ define <4 x i32> @sabd_4s(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_4s: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.sext = sext <4 x i32> %a to <4 x i64> %b.sext = sext <4 x i32> %b to <4 x i64> %sub = sub <4 x i64> %a.sext, %b.sext @@ -189,6 +257,14 @@ define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) { ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_4s_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVABD-NEXT: vabd.vv v9, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v9 +; ZVABD-NEXT: ret %a.sext = sext <4 x i16> %a to <4 x i32> %b.sext = sext <4 x i16> %b to <4 x i32> %sub = sub <4 x i32> %a.sext, %b.sext @@ -204,6 +280,12 @@ define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_2d: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.sext = sext <2 x i64> %a to <2 x i128> %b.sext = sext <2 x i64> %b to <2 x i128> %sub = sub <2 x i128> %a.sext, %b.sext @@ -223,6 +305,14 @@ define <2 x i64> @sabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_2d_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZVABD-NEXT: vabd.vv v9, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v9 +; ZVABD-NEXT: ret %a.sext = sext <2 x i32> %a to <2 x i64> %b.sext = sext <2 x i32> %b to <2 x i64> %sub = sub <2 x i64> %a.sext, %b.sext @@ -243,6 +333,12 @@ define <8 x i8> @uabd_8b(<8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_8b: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.zext = zext <8 x i8> %a to <8 x i16> %b.zext = zext <8 x i8> %b to <8 x i16> %sub = sub <8 x i16> %a.zext, %b.zext @@ -260,6 +356,12 @@ define <16 x i8> @uabd_16b(<16 x i8> %a, <16 x i8> %b) { ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_16b: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.zext = zext <16 x i8> %a to <16 x i16> %b.zext = zext <16 x i8> %b to <16 x i16> %sub = sub <16 x i16> %a.zext, %b.zext @@ -277,6 +379,12 @@ define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) { ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_4h: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.zext = zext <4 x i16> %a to <4 x i32> %b.zext = zext <4 x i16> %b to <4 x i32> %sub = sub <4 x i32> %a.zext, %b.zext @@ -296,6 +404,14 @@ define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) { ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_4h_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVABD-NEXT: vabdu.vv v9, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v9 +; ZVABD-NEXT: ret %a.zext = zext <4 x i8> %a to <4 x i16> %b.zext = zext <4 x i8> %b to <4 x i16> %sub = sub <4 x i16> %a.zext, %b.zext @@ -312,6 +428,12 @@ define <8 x i16> @uabd_8h(<8 x i16> %a, <8 x i16> %b) { ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_8h: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.zext = zext <8 x i16> %a to <8 x i32> %b.zext = zext <8 x i16> %b to <8 x i32> %sub = sub <8 x i32> %a.zext, %b.zext @@ -331,6 +453,14 @@ define <8 x i16> @uabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_8h_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZVABD-NEXT: vabdu.vv v9, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v9 +; ZVABD-NEXT: ret %a.zext = zext <8 x i8> %a to <8 x i16> %b.zext = zext <8 x i8> %b to <8 x i16> %sub = sub <8 x i16> %a.zext, %b.zext @@ -347,6 +477,12 @@ define <2 x i32> @uabd_2s(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_2s: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.zext = zext <2 x i32> %a to <2 x i64> %b.zext = zext <2 x i32> %b to <2 x i64> %sub = sub <2 x i64> %a.zext, %b.zext @@ -366,6 +502,14 @@ define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) { ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_2s_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVABD-NEXT: vabdu.vv v9, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v9 +; ZVABD-NEXT: ret %a.zext = zext <2 x i16> %a to <2 x i32> %b.zext = zext <2 x i16> %b to <2 x i32> %sub = sub <2 x i32> %a.zext, %b.zext @@ -382,6 +526,12 @@ define <4 x i32> @uabd_4s(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_4s: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.zext = zext <4 x i32> %a to <4 x i64> %b.zext = zext <4 x i32> %b to <4 x i64> %sub = sub <4 x i64> %a.zext, %b.zext @@ -401,6 +551,14 @@ define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) { ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_4s_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVABD-NEXT: vabdu.vv v9, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v9 +; ZVABD-NEXT: ret %a.zext = zext <4 x i16> %a to <4 x i32> %b.zext = zext <4 x i16> %b to <4 x i32> %sub = sub <4 x i32> %a.zext, %b.zext @@ -416,6 +574,12 @@ define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_2d: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: ret %a.zext = zext <2 x i64> %a to <2 x i128> %b.zext = zext <2 x i64> %b to <2 x i128> %sub = sub <2 x i128> %a.zext, %b.zext @@ -435,6 +599,14 @@ define <2 x i64> @uabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vzext.vf2 v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_2d_promoted_ops: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; ZVABD-NEXT: vabdu.vv v9, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; ZVABD-NEXT: vzext.vf2 v8, v9 +; ZVABD-NEXT: ret %a.zext = zext <2 x i32> %a to <2 x i64> %b.zext = zext <2 x i32> %b to <2 x i64> %sub = sub <2 x i64> %a.zext, %b.zext @@ -451,6 +623,14 @@ define <16 x i8> @uabd_v16i8_nuw(<16 x i8> %a, <16 x i8> %b) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_v16i8_nuw: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVABD-NEXT: vsub.vv v8, v8, v9 +; ZVABD-NEXT: vrsub.vi v9, v8, 0 +; ZVABD-NEXT: vmax.vv v8, v8, v9 +; ZVABD-NEXT: ret %sub = sub nuw <16 x i8> %a, %b %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true) ret <16 x i8> %abs @@ -465,6 +645,14 @@ define <8 x i16> @uabd_v8i16_nuw(<8 x i16> %a, <8 x i16> %b) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_v8i16_nuw: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVABD-NEXT: vsub.vv v8, v8, v9 +; ZVABD-NEXT: vrsub.vi v9, v8, 0 +; ZVABD-NEXT: vmax.vv v8, v8, v9 +; ZVABD-NEXT: ret %sub = sub nuw <8 x i16> %a, %b %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) ret <8 x i16> %abs @@ -479,6 +667,14 @@ define <4 x i32> @uabd_v4i32_nuw(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_v4i32_nuw: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVABD-NEXT: vsub.vv v8, v8, v9 +; ZVABD-NEXT: vrsub.vi v9, v8, 0 +; ZVABD-NEXT: vmax.vv v8, v8, v9 +; ZVABD-NEXT: ret %sub = sub nuw <4 x i32> %a, %b %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) ret <4 x i32> %abs @@ -493,6 +689,14 @@ define <2 x i64> @uabd_v2i64_nuw(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: uabd_v2i64_nuw: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVABD-NEXT: vsub.vv v8, v8, v9 +; ZVABD-NEXT: vrsub.vi v9, v8, 0 +; ZVABD-NEXT: vmax.vv v8, v8, v9 +; ZVABD-NEXT: ret %sub = sub nuw <2 x i64> %a, %b %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) ret <2 x i64> %abs @@ -507,6 +711,12 @@ define <16 x i8> @sabd_v16i8_nsw(<16 x i8> %a, <16 x i8> %b) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_v16i8_nsw: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %sub = sub nsw <16 x i8> %a, %b %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true) ret <16 x i8> %abs @@ -521,6 +731,12 @@ define <8 x i16> @sabd_v8i16_nsw(<8 x i16> %a, <8 x i16> %b) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_v8i16_nsw: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %sub = sub nsw <8 x i16> %a, %b %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) ret <8 x i16> %abs @@ -535,6 +751,12 @@ define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_v4i32_nsw: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %sub = sub nsw <4 x i32> %a, %b %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) ret <4 x i32> %abs @@ -549,6 +771,12 @@ define <2 x i64> @sabd_v2i64_nsw(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sabd_v2i64_nsw: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %sub = sub nsw <2 x i64> %a, %b %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) ret <2 x i64> %abs @@ -563,6 +791,12 @@ define <16 x i8> @smaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: smaxmin_v16i8: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %a = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %0, <16 x i8> %1) %b = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %0, <16 x i8> %1) %sub = sub <16 x i8> %a, %b @@ -578,6 +812,12 @@ define <8 x i16> @smaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: smaxmin_v8i16: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %a = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> %0, <8 x i16> %1) %b = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> %0, <8 x i16> %1) %sub = sub <8 x i16> %a, %b @@ -593,6 +833,12 @@ define <4 x i32> @smaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: smaxmin_v4i32: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %a = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %0, <4 x i32> %1) %b = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %0, <4 x i32> %1) %sub = sub <4 x i32> %a, %b @@ -608,6 +854,12 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) { ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: smaxmin_v2i64: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVABD-NEXT: vabd.vv v8, v8, v9 +; ZVABD-NEXT: ret %a = tail call <2 x i64> @llvm.smax.v2i64(<2 x i64> %0, <2 x i64> %1) %b = tail call <2 x i64> @llvm.smin.v2i64(<2 x i64> %0, <2 x i64> %1) %sub = sub <2 x i64> %a, %b @@ -623,6 +875,12 @@ define <16 x i8> @umaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) { ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: umaxmin_v16i8: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: ret %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1) %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %0, <16 x i8> %1) %sub = sub <16 x i8> %a, %b @@ -638,6 +896,12 @@ define <8 x i16> @umaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) { ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: umaxmin_v8i16: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: ret %a = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> %0, <8 x i16> %1) %b = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %0, <8 x i16> %1) %sub = sub <8 x i16> %a, %b @@ -653,6 +917,12 @@ define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) { ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: umaxmin_v4i32: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: ret %a = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %0, <4 x i32> %1) %b = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %0, <4 x i32> %1) %sub = sub <4 x i32> %a, %b @@ -668,6 +938,12 @@ define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) { ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: umaxmin_v2i64: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: ret %a = tail call <2 x i64> @llvm.umax.v2i64(<2 x i64> %0, <2 x i64> %1) %b = tail call <2 x i64> @llvm.umin.v2i64(<2 x i64> %0, <2 x i64> %1) %sub = sub <2 x i64> %a, %b @@ -683,6 +959,12 @@ define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) { ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: umaxmin_v16i8_com1: +; ZVABD: # %bb.0: +; ZVABD-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: ret %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1) %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> %0) %sub = sub <16 x i8> %a, %b @@ -725,3 +1007,5 @@ declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; RV32: {{.*}} ; RV64: {{.*}} +; ZVABD-RV32: {{.*}} +; ZVABD-RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll index 8da605d35270de..62ec0543949a0d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sad.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v | FileCheck %s ; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+experimental-zvabd | FileCheck %s --check-prefix=ZVABD +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+experimental-zvabd | FileCheck %s --check-prefix=ZVABD define signext i16 @sad_4x8_as_i16(<4 x i8> %a, <4 x i8> %b) { ; CHECK-LABEL: sad_4x8_as_i16: @@ -16,6 +18,18 @@ define signext i16 @sad_4x8_as_i16(<4 x i8> %a, <4 x i8> %b) { ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sad_4x8_as_i16: +; ZVABD: # %bb.0: # %entry +; ZVABD-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVABD-NEXT: vmv.s.x v9, zero +; ZVABD-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; ZVABD-NEXT: vwredsumu.vs v8, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVABD-NEXT: vmv.x.s a0, v8 +; ZVABD-NEXT: ret entry: %1 = zext <4 x i8> %a to <4 x i16> %3 = zext <4 x i8> %b to <4 x i16> @@ -38,6 +52,17 @@ define signext i32 @sad_4x8_as_i32(<4 x i8> %a, <4 x i8> %b) { ; CHECK-NEXT: vredsum.vs v8, v9, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sad_4x8_as_i32: +; ZVABD: # %bb.0: # %entry +; ZVABD-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVABD-NEXT: vzext.vf4 v9, v8 +; ZVABD-NEXT: vmv.s.x v8, zero +; ZVABD-NEXT: vredsum.vs v8, v9, v8 +; ZVABD-NEXT: vmv.x.s a0, v8 +; ZVABD-NEXT: ret entry: %1 = zext <4 x i8> %a to <4 x i32> %3 = zext <4 x i8> %b to <4 x i32> @@ -61,6 +86,18 @@ define signext i16 @sad_16x8_as_i16(<16 x i8> %a, <16 x i8> %b) { ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sad_16x8_as_i16: +; ZVABD: # %bb.0: # %entry +; ZVABD-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVABD-NEXT: vmv.s.x v9, zero +; ZVABD-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; ZVABD-NEXT: vwredsumu.vs v8, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVABD-NEXT: vmv.x.s a0, v8 +; ZVABD-NEXT: ret entry: %1 = zext <16 x i8> %a to <16 x i16> %3 = zext <16 x i8> %b to <16 x i16> @@ -83,6 +120,17 @@ define signext i32 @sad_16x8_as_i32(<16 x i8> %a, <16 x i8> %b) { ; CHECK-NEXT: vredsum.vs v8, v12, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sad_16x8_as_i32: +; ZVABD: # %bb.0: # %entry +; ZVABD-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVABD-NEXT: vzext.vf4 v12, v8 +; ZVABD-NEXT: vmv.s.x v8, zero +; ZVABD-NEXT: vredsum.vs v8, v12, v8 +; ZVABD-NEXT: vmv.x.s a0, v8 +; ZVABD-NEXT: ret entry: %1 = zext <16 x i8> %a to <16 x i32> %3 = zext <16 x i8> %b to <16 x i32> @@ -135,6 +183,41 @@ define signext i32 @sad_2block_16xi8_as_i32(ptr %a, ptr %b, i32 signext %stridea ; CHECK-NEXT: vredsum.vs v8, v20, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret +; +; ZVABD-LABEL: sad_2block_16xi8_as_i32: +; ZVABD: # %bb.0: # %entry +; ZVABD-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVABD-NEXT: vle8.v v8, (a0) +; ZVABD-NEXT: vle8.v v9, (a1) +; ZVABD-NEXT: add a0, a0, a2 +; ZVABD-NEXT: add a1, a1, a3 +; ZVABD-NEXT: vle8.v v10, (a0) +; ZVABD-NEXT: vle8.v v11, (a1) +; ZVABD-NEXT: add a0, a0, a2 +; ZVABD-NEXT: add a1, a1, a3 +; ZVABD-NEXT: vle8.v v12, (a0) +; ZVABD-NEXT: vle8.v v13, (a1) +; ZVABD-NEXT: add a0, a0, a2 +; ZVABD-NEXT: add a1, a1, a3 +; ZVABD-NEXT: vabdu.vv v8, v8, v9 +; ZVABD-NEXT: vle8.v v9, (a0) +; ZVABD-NEXT: vabdu.vv v10, v10, v11 +; ZVABD-NEXT: vle8.v v11, (a1) +; ZVABD-NEXT: vwaddu.vv v14, v10, v8 +; ZVABD-NEXT: vabdu.vv v8, v12, v13 +; ZVABD-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVABD-NEXT: vzext.vf2 v12, v8 +; ZVABD-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; ZVABD-NEXT: vabdu.vv v8, v9, v11 +; ZVABD-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVABD-NEXT: vwaddu.vv v16, v12, v14 +; ZVABD-NEXT: vzext.vf2 v10, v8 +; ZVABD-NEXT: vwaddu.wv v16, v16, v10 +; ZVABD-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVABD-NEXT: vmv.s.x v8, zero +; ZVABD-NEXT: vredsum.vs v8, v16, v8 +; ZVABD-NEXT: vmv.x.s a0, v8 +; ZVABD-NEXT: ret entry: %idx.ext8 = sext i32 %strideb to i64 %idx.ext = sext i32 %stridea to i64 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits