kmclaughlin updated this revision to Diff 226640. kmclaughlin added a reviewer: sdesmalen. kmclaughlin added a comment.
- Split functions in sve-masked-ldst-nonext.ll into separate load & store tests CHANGES SINCE LAST ACTION https://reviews.llvm.org/D69378/new/ https://reviews.llvm.org/D69378 Files: llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll @@ -0,0 +1,76 @@ +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; Masked Stores +; + +define void @masked_trunc_store_nxv2i8(<vscale x 2 x i64> *%a, <vscale x 2 x i8> *%b, <vscale x 2 x i1> %mask) { +; CHECK-LABEL: masked_trunc_store_nxv2i8: +; CHECK: ld1d { [[IN:z[0-9]]].d }, [[PG:p[0-9]]]/z, [x0] +; CHECK: st1b { [[IN]].d }, [[PG]], [x1] + %load = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64> *%a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef) + %trunc = trunc <vscale x 2 x i64> %load to <vscale x 2 x i8> + call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %trunc, <vscale x 2 x i8> *%b, i32 8, <vscale x 2 x i1> %mask) + ret void +} + +define void @masked_trunc_store_nxv2i16(<vscale x 2 x i64> *%a, <vscale x 2 x i16> *%b, <vscale x 2 x i1> %mask) { +; CHECK-LABEL: masked_trunc_store_nxv2i16: +; CHECK: ld1d { [[IN:z[0-9]]].d }, [[PG:p[0-9]]]/z, [x0] +; CHECK: st1h { [[IN]].d }, [[PG]], [x1] + %load = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64> *%a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef) + %trunc = trunc <vscale x 2 x i64> %load to <vscale x 2 x i16> + call void @llvm.masked.store.nxv2i16(<vscale x 2 x i16> %trunc, <vscale x 2 x i16> *%b, i32 8, <vscale x 2 x i1> %mask) + ret void +} + +define void @masked_trunc_store_nxv2i32(<vscale x 2 x i64> *%a, <vscale x 2 x i32> *%b, <vscale x 2 x i1> %mask) { +; CHECK-LABEL: masked_trunc_store_nxv2i32: +; CHECK: ld1d { [[IN:z[0-9]]].d }, [[PG:p[0-9]]]/z, [x0] +; CHECK: st1w { [[IN]].d }, [[PG]], [x1] + %load = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64> *%a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef) + %trunc = trunc <vscale x 2 x i64> %load to <vscale x 2 x i32> + call void @llvm.masked.store.nxv2i32(<vscale x 2 x i32> %trunc, <vscale x 2 x i32> *%b, i32 8, <vscale x 2 x i1> %mask) + ret void +} + +define void @masked_trunc_store_nxv4i8(<vscale x 4 x i32> *%a, <vscale x 4 x i8> *%b, <vscale x 4 x i1> %mask) { +; CHECK-LABEL: masked_trunc_store_nxv4i8: +; CHECK: ld1w { [[IN:z[0-9]]].s }, [[PG:p[0-9]]]/z, [x0] +; CHECK: st1b { [[IN]].s }, [[PG]], [x1] + %load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32> *%a, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef) + %trunc = trunc <vscale x 4 x i32> %load to <vscale x 4 x i8> + call void @llvm.masked.store.nxv4i8(<vscale x 4 x i8> %trunc, <vscale x 4 x i8> *%b, i32 4, <vscale x 4 x i1> %mask) + ret void +} + +define void @masked_trunc_store_nxv4i16(<vscale x 4 x i32> *%a, <vscale x 4 x i16> *%b, <vscale x 4 x i1> %mask) { +; CHECK-LABEL: masked_trunc_store_nxv4i16: +; CHECK: ld1w { [[IN:z[0-9]]].s }, [[PG:p[0-9]]]/z, [x0] +; CHECK: st1h { [[IN]].s }, [[PG]], [x1] + %load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32> *%a, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef) + %trunc = trunc <vscale x 4 x i32> %load to <vscale x 4 x i16> + call void @llvm.masked.store.nxv4i16(<vscale x 4 x i16> %trunc, <vscale x 4 x i16> *%b, i32 4, <vscale x 4 x i1> %mask) + ret void +} + +define void @masked_trunc_store_nxv8i8(<vscale x 8 x i16> *%a, <vscale x 8 x i8> *%b, <vscale x 8 x i1> %mask) { +; CHECK-LABEL: masked_trunc_store_nxv8i8: +; CHECK: ld1h { [[IN:z[0-9]]].h }, [[PG:p[0-9]]]/z, [x0] +; CHECK: st1b { [[IN]].h }, [[PG]], [x1] + %load = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16(<vscale x 8 x i16> *%a, i32 2, <vscale x 8 x i1> %mask, <vscale x 8 x i16> undef) + %trunc = trunc <vscale x 8 x i16> %load to <vscale x 8 x i8> + call void @llvm.masked.store.nxv8i8(<vscale x 8 x i8> %trunc, <vscale x 8 x i8> *%b, i32 2, <vscale x 8 x i1> %mask) + ret void +} + +declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>) +declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>) +declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16(<vscale x 8 x i16>*, i32, <vscale x 8 x i1>, <vscale x 8 x i16>) + +declare void @llvm.masked.store.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>*, i32, <vscale x 2 x i1>) +declare void @llvm.masked.store.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>*, i32, <vscale x 2 x i1>) +declare void @llvm.masked.store.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>*, i32, <vscale x 2 x i1>) +declare void @llvm.masked.store.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>*, i32, <vscale x 4 x i1>) +declare void @llvm.masked.store.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>*, i32, <vscale x 4 x i1>) +declare void @llvm.masked.store.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>*, i32, <vscale x 8 x i1>) Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll +++ llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll @@ -74,6 +74,80 @@ ret <vscale x 8 x half> %load } +; +; Masked Stores +; + +define void @masked_store_nxv2i64(<vscale x 2 x i64> *%a, <vscale x 2 x i64> %val, <vscale x 2 x i1> %mask) { +; CHECK-LABEL: masked_store_nxv2i64: +; CHECK: st1d { [[IN]].d }, [[PG]], [x0] + call void @llvm.masked.store.nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x i64> *%a, i32 8, <vscale x 2 x i1> %mask) + ret void +} + +define void @masked_store_nxv4i32(<vscale x 4 x i32> *%a, <vscale x 4 x i32> %val, <vscale x 4 x i1> %mask) { +; CHECK-LABEL: masked_store_nxv4i32: +; CHECK: st1w { [[IN]].s }, [[PG]], [x0] + call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32> *%a, i32 4, <vscale x 4 x i1> %mask) + ret void +} + +define void @masked_store_nxv8i16(<vscale x 8 x i16> *%a, <vscale x 8 x i16> %val, <vscale x 8 x i1> %mask) { +; CHECK-LABEL: masked_store_nxv8i16: +; CHECK: st1h { [[IN]].h }, [[PG]], [x0] + call void @llvm.masked.store.nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x i16> *%a, i32 2, <vscale x 8 x i1> %mask) + ret void +} + +define void @masked_store_nxv16i8(<vscale x 16 x i8> *%a, <vscale x 16 x i8> %val, <vscale x 16 x i1> %mask) { +; CHECK-LABEL: masked_store_nxv16i8: +; CHECK: st1b { [[IN]].b }, [[PG]], [x0] + call void @llvm.masked.store.nxv16i8(<vscale x 16 x i8> %val, <vscale x 16 x i8> *%a, i32 1, <vscale x 16 x i1> %mask) + ret void +} + +define void @masked_store_nxv2f64(<vscale x 2 x double> *%a, <vscale x 2 x double> %val, <vscale x 2 x i1> %mask) { +; CHECK-LABEL: masked_store_nxv2f64: +; CHECK: st1d { [[IN]].d }, [[PG]], [x0] + call void @llvm.masked.store.nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x double> *%a, i32 8, <vscale x 2 x i1> %mask) + ret void +} + +define void @masked_store_nxv2f32(<vscale x 2 x float> *%a, <vscale x 2 x float> %val, <vscale x 2 x i1> %mask) { +; CHECK-LABEL: masked_store_nxv2f32: +; CHECK: st1w { [[IN]].d }, [[PG]], [x0] + call void @llvm.masked.store.nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x float> *%a, i32 4, <vscale x 2 x i1> %mask) + ret void +} + +define void @masked_store_nxv2f16(<vscale x 2 x half> *%a, <vscale x 2 x half> %val, <vscale x 2 x i1> %mask) { +; CHECK-LABEL: masked_store_nxv2f16: +; CHECK: st1h { [[IN]].d }, [[PG]], [x0] + call void @llvm.masked.store.nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x half> *%a, i32 4, <vscale x 2 x i1> %mask) + ret void +} + +define void @masked_store_nxv4f32(<vscale x 4 x float> *%a, <vscale x 4 x float> %val, <vscale x 4 x i1> %mask) { +; CHECK-LABEL: masked_store_nxv4f32: +; CHECK: st1w { [[IN]].s }, [[PG]], [x0] + call void @llvm.masked.store.nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float> *%a, i32 4, <vscale x 4 x i1> %mask) + ret void +} + +define void @masked_store_nxv4f16(<vscale x 4 x half> *%a, <vscale x 4 x half> %val, <vscale x 4 x i1> %mask) { +; CHECK-LABEL: masked_store_nxv4f16: +; CHECK: st1h { [[IN]].s }, [[PG]], [x0] + call void @llvm.masked.store.nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half> *%a, i32 2, <vscale x 4 x i1> %mask) + ret void +} + +define void @masked_store_nxv8f16(<vscale x 8 x half> *%a, <vscale x 8 x half> %val, <vscale x 8 x i1> %mask) { +; CHECK-LABEL: masked_store_nxv8f16: +; CHECK: st1h { [[IN]].h }, [[PG]], [x0] + call void @llvm.masked.store.nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x half> *%a, i32 2, <vscale x 8 x i1> %mask) + ret void +} + declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>) declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>) declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16(<vscale x 8 x i16>*, i32, <vscale x 8 x i1>, <vscale x 8 x i16>) @@ -85,3 +159,15 @@ declare <vscale x 4 x float> @llvm.masked.load.nxv4f32(<vscale x 4 x float>*, i32, <vscale x 4 x i1>, <vscale x 4 x float>) declare <vscale x 4 x half> @llvm.masked.load.nxv4f16(<vscale x 4 x half>*, i32, <vscale x 4 x i1>, <vscale x 4 x half>) declare <vscale x 8 x half> @llvm.masked.load.nxv8f16(<vscale x 8 x half>*, i32, <vscale x 8 x i1>, <vscale x 8 x half>) + +declare void @llvm.masked.store.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>*, i32, <vscale x 2 x i1>) +declare void @llvm.masked.store.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>*, i32, <vscale x 4 x i1>) +declare void @llvm.masked.store.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>*, i32, <vscale x 8 x i1>) +declare void @llvm.masked.store.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32, <vscale x 16 x i1>) + +declare void @llvm.masked.store.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>*, i32, <vscale x 2 x i1>) +declare void @llvm.masked.store.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>*, i32, <vscale x 2 x i1>) +declare void @llvm.masked.store.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>*, i32, <vscale x 2 x i1>) +declare void @llvm.masked.store.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>*, i32, <vscale x 4 x i1>) +declare void @llvm.masked.store.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>*, i32, <vscale x 4 x i1>) +declare void @llvm.masked.store.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>*, i32, <vscale x 8 x i1>) Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -147,7 +147,7 @@ bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); - bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { + bool isLegalMaskedLoadStore(Type *DataType, MaybeAlign Alignment) { if (!isa<VectorType>(DataType) || !ST->hasSVE()) return false; @@ -162,6 +162,14 @@ return false; } + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { + return isLegalMaskedLoadStore(DataType, Alignment); + } + + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { + return isLegalMaskedLoadStore(DataType, Alignment); + } + int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, unsigned AddressSpace, Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1108,6 +1108,36 @@ // 16-element contiguous loads defm : pred_load<nxv16i8, nxv16i1, nonext_masked_load, LD1B_IMM>; + multiclass pred_store<ValueType Ty, ValueType PredTy, SDPatternOperator Store, + Instruction RegImmInst> { + def _default : Pat<(Store (Ty ZPR:$vec), GPR64:$base, (PredTy PPR:$gp)), + (RegImmInst ZPR:$vec, PPR:$gp, GPR64:$base, (i64 0))>; + } + + // 2-element contiguous stores + defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i8, ST1B_D_IMM>; + defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i16, ST1H_D_IMM>; + defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i32, ST1W_D_IMM>; + defm : pred_store<nxv2i64, nxv2i1, nontrunc_masked_store, ST1D_IMM>; + defm : pred_store<nxv2f16, nxv2i1, nontrunc_masked_store, ST1H_D_IMM>; + defm : pred_store<nxv2f32, nxv2i1, nontrunc_masked_store, ST1W_D_IMM>; + defm : pred_store<nxv2f64, nxv2i1, nontrunc_masked_store, ST1D_IMM>; + + // 4-element contiguous stores + defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i8, ST1B_S_IMM>; + defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i16, ST1H_S_IMM>; + defm : pred_store<nxv4i32, nxv4i1, nontrunc_masked_store, ST1W_IMM>; + defm : pred_store<nxv4f16, nxv4i1, nontrunc_masked_store, ST1H_S_IMM>; + defm : pred_store<nxv4f32, nxv4i1, nontrunc_masked_store, ST1W_IMM>; + + // 8-element contiguous stores + defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H_IMM>; + defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H_IMM>; + defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H_IMM>; + + // 16-element contiguous stores + defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>; + } let Predicates = [HasSVE2] in { Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -309,6 +309,34 @@ return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; +// non-truncating masked store fragment. +def nontrunc_masked_store : + PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + return !cast<MaskedStoreSDNode>(N)->isTruncatingStore(); +}]>; +// truncating masked store fragments. +def trunc_masked_store : + PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + return cast<MaskedStoreSDNode>(N)->isTruncatingStore(); +}]>; +def trunc_masked_store_i8 : + PatFrag<(ops node:$val, node:$ptr, node:$pred), + (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ + return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def trunc_masked_store_i16 : + PatFrag<(ops node:$val, node:$ptr, node:$pred), + (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ + return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; +def trunc_masked_store_i32 : + PatFrag<(ops node:$val, node:$ptr, node:$pred), + (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ + return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; + // Node definitions. def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits