Author: Francesco Petrogalli Date: 2020-07-31T17:27:52+02:00 New Revision: 6b66be512110acb2dfdab00d9755d86e185f1e3d
URL: https://github.com/llvm/llvm-project/commit/6b66be512110acb2dfdab00d9755d86e185f1e3d DIFF: https://github.com/llvm/llvm-project/commit/6b66be512110acb2dfdab00d9755d86e185f1e3d.diff LOG: [llvm][sve] Reg + Imm addressing mode for ld1ro. Reviewers: kmclaughlin, efriedma, sdesmalen Subscribers: tschuett, hiraditya, psnobl, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D83357 (cherry picked from commit 809600d6642773f71245f76995dab355effc73af) Added: llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll Modified: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/SVEInstrFormats.td Removed: ################################################################################ diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2c992c07fad9..1500da2fdfc7 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12301,6 +12301,9 @@ static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) { "Unsupported opcode."); SDLoc DL(N); EVT VT = N->getValueType(0); + if (VT == MVT::nxv8bf16 && + !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16()) + return SDValue(); EVT LoadVT = VT; if (VT.isFloatingPoint()) diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 6df7970f4d82..4f4ba692c2db 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -495,6 +495,9 @@ def SImmS4XForm : SDNodeXForm<imm, [{ def SImmS16XForm : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(N->getSExtValue() / 16, SDLoc(N), MVT::i64); }]>; +def SImmS32XForm : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getSExtValue() / 32, SDLoc(N), MVT::i64); +}]>; // simm6sN predicate - True if the immediate is a multiple of N in the range // [-32 * N, 31 * N]. @@ -546,7 +549,7 @@ def simm4s16 : Operand<i64>, ImmLeaf<i64, let DecoderMethod = "DecodeSImm<4>"; } def simm4s32 : Operand<i64>, ImmLeaf<i64, -[{ return Imm >=-256 && Imm <= 224 && (Imm % 32) == 0x0; }]> { +[{ return Imm >=-256 && Imm <= 224 && (Imm % 32) == 0x0; }], SImmS32XForm> { let PrintMethod = "printImmScale<32>"; let ParserMatchClass = SImm4s32Operand; let DecoderMethod = "DecodeSImm<4>"; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index a005d1e65abe..c56a65b9e212 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -7718,9 +7718,13 @@ multiclass sve_mem_ldor_si<bits<2> sz, string asm, RegisterOperand listty, (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s32:$imm4), 0>; // Base addressing mode - def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$gp), GPR64sp:$base)), - (!cast<Instruction>(NAME) PPR3bAny:$gp, GPR64sp:$base, (i64 0))>; - + def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$Pg), GPR64sp:$base)), + (!cast<Instruction>(NAME) PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>; + let AddedComplexity = 2 in { + // Reg + Imm addressing mode + def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$Pg), (add GPR64:$base, (i64 simm4s32:$imm)))), + (!cast<Instruction>(NAME) $Pg, $base, simm4s32:$imm)>; + } } class sve_mem_ldor_ss<bits<2> sz, string asm, RegisterOperand VecList, diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll new file mode 100644 index 000000000000..e7edfc9d6bdd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll @@ -0,0 +1,174 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+f64mm -asm-verbose=0 < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; WARN-NOT: warning + +; +; LD1ROB +; + +define <vscale x 16 x i8> @ld1rob_i8(<vscale x 16 x i1> %pg, i8* %a) nounwind { +; CHECK-LABEL: ld1rob_i8: +; CHECK-NEXT: ld1rob { z0.b }, p0/z, [x0, #32] +; CHECK-NEXT: ret + %base = getelementptr i8, i8* %a, i64 32 + %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> %pg, i8* %base) + ret <vscale x 16 x i8> %load +} + +; +; LD1ROH +; + +define <vscale x 8 x i16> @ld1roh_i16(<vscale x 8 x i1> %pg, i16* %a) nounwind { +; CHECK-LABEL: ld1roh_i16: +; CHECK-NEXT: ld1roh { z0.h }, p0/z, [x0, #64] +; CHECK-NEXT: ret + %base = getelementptr i16, i16* %a, i64 32 + %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1ro.nxv8i16(<vscale x 8 x i1> %pg, i16* %base) + ret <vscale x 8 x i16> %load +} + +define <vscale x 8 x half> @ld1roh_f16(<vscale x 8 x i1> %pg, half* %a) nounwind { +; CHECK-LABEL: ld1roh_f16: +; CHECK-NEXT: ld1roh { z0.h }, p0/z, [x0, #64] +; CHECK-NEXT: ret + %base = getelementptr half, half* %a, i64 32 + %load = call <vscale x 8 x half> @llvm.aarch64.sve.ld1ro.nxv8f16(<vscale x 8 x i1> %pg, half* %base) + ret <vscale x 8 x half> %load +} + +define <vscale x 8 x bfloat> @ld1roh_bf16(<vscale x 8 x i1> %pg, bfloat* %a) nounwind #0 { +; CHECK-LABEL: ld1roh_bf16: +; CHECK-NEXT: ld1roh { z0.h }, p0/z, [x0, #64] +; CHECK-NEXT: ret + %base = getelementptr bfloat, bfloat* %a, i64 32 + %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1ro.nxv8bf16(<vscale x 8 x i1> %pg, bfloat* %base) + ret <vscale x 8 x bfloat> %load +} + +; +; LD1ROW +; + +define<vscale x 4 x i32> @ld1row_i32(<vscale x 4 x i1> %pg, i32* %a) nounwind { +; CHECK-LABEL: ld1row_i32: +; CHECK-NEXT: ld1row { z0.s }, p0/z, [x0, #128] +; CHECK-NEXT: ret + %base = getelementptr i32, i32* %a, i64 32 + %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1ro.nxv4i32(<vscale x 4 x i1> %pg, i32* %base) + ret <vscale x 4 x i32> %load +} + +define<vscale x 4 x float> @ld1row_f32(<vscale x 4 x i1> %pg, float* %a) nounwind { +; CHECK-LABEL: ld1row_f32: +; CHECK-NEXT: ld1row { z0.s }, p0/z, [x0, #128] +; CHECK-NEXT: ret + %base = getelementptr float, float* %a, i64 32 + %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1ro.nxv4f32(<vscale x 4 x i1> %pg, float* %base) + ret <vscale x 4 x float> %load +} + +; +; LD1ROD +; + +define <vscale x 2 x i64> @ld1rod_i64(<vscale x 2 x i1> %pg, i64* %a) nounwind { +; CHECK-LABEL: ld1rod_i64: +; CHECK-NEXT: ld1rod { z0.d }, p0/z, [x0, #-64] +; CHECK-NEXT: ret + %base = getelementptr i64, i64* %a, i64 -8 + %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1ro.nxv2i64(<vscale x 2 x i1> %pg, i64* %base) + ret <vscale x 2 x i64> %load +} + +define <vscale x 2 x double> @ld1rod_f64(<vscale x 2 x i1> %pg, double* %a) nounwind { +; CHECK-LABEL: ld1rod_f64: +; CHECK-NEXT: ld1rod { z0.d }, p0/z, [x0, #-128] +; CHECK-NEXT: ret + %base = getelementptr double, double* %a, i64 -16 + %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1ro.nxv2f64(<vscale x 2 x i1> %pg, double* %base) + ret <vscale x 2 x double> %load +} + + +;;;;;;;;;;;;;; +; range checks: immediate must be a multiple of 32 in the range -256, ..., 224 + +; lower bound +define <vscale x 16 x i8> @ld1rob_i8_lower_bound(<vscale x 16 x i1> %pg, i8* %a) nounwind { +; CHECK-LABEL: ld1rob_i8_lower_bound: +; CHECK-NEXT: ld1rob { z0.b }, p0/z, [x0, #-256] +; CHECK-NEXT: ret + %base = getelementptr i8, i8* %a, i64 -256 + %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> %pg, i8* %base) + ret <vscale x 16 x i8> %load +} + +; below lower bound +define <vscale x 8 x i16> @ld1roh_i16_below_lower_bound(<vscale x 8 x i1> %pg, i16* %a) nounwind { +; CHECK-LABEL: ld1roh_i16_below_lower_bound: +; CHECK-NEXT: sub x[[BASE:[0-9]+]], x0, #258 +; CHECK-NEXT: ld1roh { z0.h }, p0/z, [x[[BASE]]] +; CHECK-NEXT: ret + %base = getelementptr i16, i16* %a, i64 -129 + %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1ro.nxv8i16(<vscale x 8 x i1> %pg, i16* %base) + ret <vscale x 8 x i16> %load +} + +define <vscale x 16 x i8> @ld1rob_i8_below_lower_bound_01(<vscale x 16 x i1> %pg, i8* %a) nounwind { +; CHECK-LABEL: ld1rob_i8_below_lower_bound_01: +; CHECK-NEXT: mov x[[OFFSET:[0-9]+]], #-257 +; CHECK-NEXT: ld1rob { z0.b }, p0/z, [x0, x[[OFFSET]]] +; CHECK-NEXT: ret + %base = getelementptr i8, i8* %a, i64 -257 + %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> %pg, i8* %base) + ret <vscale x 16 x i8> %load +} + +; not a multiple of 32 +define<vscale x 4 x i32> @ld1row_i32_not_multiple(<vscale x 4 x i1> %pg, i32* %a) nounwind { +; CHECK-LABEL: ld1row_i32_not_multiple: +; CHECK-NEXT: add x[[BASE:[0-9]+]], x0, #12 +; CHECK-NEXT: ld1row { z0.s }, p0/z, [x[[BASE]]] +; CHECK-NEXT: ret + %base = getelementptr i32, i32* %a, i64 3 + %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1ro.nxv4i32(<vscale x 4 x i1> %pg, i32* %base) + ret <vscale x 4 x i32> %load +} + +; upper bound +define <vscale x 2 x i64> @ld1rod_i64_upper_bound(<vscale x 2 x i1> %pg, i64* %a) nounwind { +; CHECK-LABEL: ld1rod_i64_upper_bound: +; CHECK-NEXT: ld1rod { z0.d }, p0/z, [x0, #224] +; CHECK-NEXT: ret + %base = getelementptr i64, i64* %a, i64 28 + %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1ro.nxv2i64(<vscale x 2 x i1> %pg, i64* %base) + ret <vscale x 2 x i64> %load +} + +define <vscale x 16 x i8> @ld1rob_i8_beyond_upper_bound(<vscale x 16 x i1> %pg, i8* %a) nounwind { +; CHECK-LABEL: ld1rob_i8_beyond_upper_bound: +; CHECK-NEXT: mov w[[OFFSET:[0-9]+]], #225 +; CHECK-NEXT: ld1rob { z0.b }, p0/z, [x0, x[[OFFSET]]] +; CHECK-NEXT: ret + %base = getelementptr i8, i8* %a, i64 225 + %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1> %pg, i8* %base) + ret <vscale x 16 x i8> %load +} + +declare <vscale x 16 x i8> @llvm.aarch64.sve.ld1ro.nxv16i8(<vscale x 16 x i1>, i8*) + +declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1ro.nxv8i16(<vscale x 8 x i1>, i16*) +declare <vscale x 8 x half> @llvm.aarch64.sve.ld1ro.nxv8f16(<vscale x 8 x i1>, half*) +declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1ro.nxv8bf16(<vscale x 8 x i1>, bfloat*) + +declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1ro.nxv4i32(<vscale x 4 x i1>, i32*) +declare <vscale x 4 x float> @llvm.aarch64.sve.ld1ro.nxv4f32(<vscale x 4 x i1>, float*) + +declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1ro.nxv2i64(<vscale x 2 x i1>, i64*) +declare <vscale x 2 x double> @llvm.aarch64.sve.ld1ro.nxv2f64(<vscale x 2 x i1>, double*) + + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+f64mm,+bf16" } _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
