llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Changpeng Fang (changpeng) <details> <summary>Changes</summary> Some upcoming intrinsics will be using these new types --- Patch is 120.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70484.diff 17 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+1-1) - (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+69-18) - (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+10) - (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.td (+2-2) - (modified) llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll (+2-2) - (modified) llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll (+8-8) - (modified) llvm/test/Analysis/CostModel/AMDGPU/arith-usat.ll (+8-8) - (modified) llvm/test/Analysis/CostModel/AMDGPU/fadd.ll (+2-2) - (modified) llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll (+4-4) - (modified) llvm/test/Analysis/CostModel/AMDGPU/fma.ll (+2-2) - (modified) llvm/test/Analysis/CostModel/AMDGPU/fmul.ll (+2-2) - (modified) llvm/test/Analysis/CostModel/AMDGPU/fsub.ll (+2-2) - (modified) llvm/test/Analysis/CostModel/AMDGPU/mul.ll (+10-10) - (modified) llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll (-8) - (modified) llvm/test/CodeGen/AMDGPU/inlineasm-v16.ll (+22) - (modified) llvm/test/CodeGen/AMDGPU/load-constant-i16.ll (+254-250) - (modified) llvm/test/CodeGen/AMDGPU/load-global-i16.ll (+236-220) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index adf4e0139e03c1d..4bf68d293425621 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -384,7 +384,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, MVT::v12f32, MVT::v16f16, MVT::v16i16, MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32, MVT::v2f64, MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64, MVT::v4i64, MVT::v8f64, MVT::v8i64, - MVT::v16f64, MVT::v16i64}, + MVT::v16f64, MVT::v16i64, MVT::v32i16, MVT::v32f16}, Custom); setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 7d457edad0d5cdf..568f8078373fcfa 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -165,6 +165,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v8f16, &AMDGPU::SGPR_128RegClass); addRegisterClass(MVT::v16i16, &AMDGPU::SGPR_256RegClass); addRegisterClass(MVT::v16f16, &AMDGPU::SGPR_256RegClass); + addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass); + addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass); } addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass); @@ -275,7 +277,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, MVT::v4f16, MVT::v3i64, MVT::v3f64, MVT::v6i32, MVT::v6f32, MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64, MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16, MVT::v16i64, MVT::v16f64, - MVT::v32i32, MVT::v32f32}) { + MVT::v32i32, MVT::v32f32, MVT::v32i16, MVT::v32f16}) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { switch (Op) { case ISD::LOAD: @@ -554,7 +556,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMAD, MVT::f16, Legal); for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16, MVT::v8i16, - MVT::v8f16, MVT::v16i16, MVT::v16f16}) { + MVT::v8f16, MVT::v16i16, MVT::v16f16, MVT::v32i16, + MVT::v32f16}) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { switch (Op) { case ISD::LOAD: @@ -640,6 +643,16 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, MVT::v16f16, Promote); AddPromotedToType(ISD::STORE, MVT::v16f16, MVT::v8i32); + setOperationAction(ISD::LOAD, MVT::v32i16, Promote); + AddPromotedToType(ISD::LOAD, MVT::v32i16, MVT::v16i32); + setOperationAction(ISD::LOAD, MVT::v32f16, Promote); + AddPromotedToType(ISD::LOAD, MVT::v32f16, MVT::v16i32); + + setOperationAction(ISD::STORE, MVT::v32i16, Promote); + AddPromotedToType(ISD::STORE, MVT::v32i16, MVT::v16i32); + setOperationAction(ISD::STORE, MVT::v32f16, Promote); + AddPromotedToType(ISD::STORE, MVT::v32f16, MVT::v16i32); + setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND}, MVT::v2i32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand); @@ -662,12 +675,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction({ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE}, MVT::f16, Legal); setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE}, - {MVT::v4f16, MVT::v8f16, MVT::v16f16}, Custom); + {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16}, + Custom); setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, - {MVT::v4f16, MVT::v8f16, MVT::v16f16}, Expand); + {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16}, + Expand); - for (MVT Vec16 : {MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16}) { + for (MVT Vec16 : {MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16, + MVT::v32i16, MVT::v32f16}) { setOperationAction( {ISD::BUILD_VECTOR, ISD::EXTRACT_VECTOR_ELT, ISD::SCALAR_TO_VECTOR}, Vec16, Custom); @@ -690,10 +706,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, {MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::v8i16, - MVT::v16f16, MVT::v16i16}, + MVT::v16f16, MVT::v16i16, MVT::v32f16, MVT::v32i16}, Custom); - for (MVT VT : {MVT::v4i16, MVT::v8i16, MVT::v16i16}) + for (MVT VT : {MVT::v4i16, MVT::v8i16, MVT::v16i16, MVT::v32i16}) // Split vector operations. setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL, ISD::ADD, ISD::SUB, ISD::MUL, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, @@ -701,7 +717,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, ISD::SSUBSAT}, VT, Custom); - for (MVT VT : {MVT::v4f16, MVT::v8f16, MVT::v16f16}) + for (MVT VT : {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16}) // Split vector operations. setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FCANONICALIZE}, VT, Custom); @@ -737,8 +753,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, {MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8, - MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16}, - Custom); + MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16, + MVT::v32i16, MVT::v32f16}, Custom); setOperationAction({ISD::SMULO, ISD::UMULO}, MVT::i64, Custom); @@ -5107,7 +5123,7 @@ SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op, assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 || VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 || VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 || - VT == MVT::v32f32); + VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16); SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); @@ -5130,7 +5146,7 @@ SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op, assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 || VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 || VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 || - VT == MVT::v32f32); + VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16); SDValue Lo0, Hi0; std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0); @@ -5897,7 +5913,8 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op, if (IsIEEEMode) return expandFMINNUM_FMAXNUM(Op.getNode(), DAG); - if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16) + if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16 || + VT == MVT::v16f16) return splitBinaryVectorOp(Op, DAG); return Op; } @@ -6415,7 +6432,7 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI)) return Combined; - if (VecSize == 128 || VecSize == 256) { + if (VecSize == 128 || VecSize == 256 || VecSize == 512 ) { SDValue Lo, Hi; EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); @@ -6428,9 +6445,7 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, Hi = DAG.getBitcast(HiVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2, DAG.getConstant(1, SL, MVT::i32))); - } else { - assert(VecSize == 256); - + } else if (VecSize == 256) { SDValue V2 = DAG.getBitcast(MVT::v4i64, Vec); SDValue Parts[4]; for (unsigned P = 0; P < 4; ++P) { @@ -6442,6 +6457,20 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, Parts[0], Parts[1])); Hi = DAG.getBitcast(HiVT, DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i64, Parts[2], Parts[3])); + } else { + assert(VecSize == 512); + + SDValue V2 = DAG.getBitcast(MVT::v8i64, Vec); + SDValue Parts[8]; + for (unsigned P = 0; P < 8; ++P) { + Parts[P] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2, + DAG.getConstant(P, SL, MVT::i32)); + } + + Lo = DAG.getBitcast(LoVT, DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v4i64, + Parts[0], Parts[1], Parts[2], Parts[3])); + Hi = DAG.getBitcast(HiVT, DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v4i64, + Parts[4], Parts[5],Parts[6], Parts[7])); } EVT IdxVT = Idx.getValueType(); @@ -6607,6 +6636,27 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, SL, VT, Blend); } + if (VT == MVT::v32i16 || VT == MVT::v32f16) { + EVT QuarterVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), + VT.getVectorNumElements() / 8); + MVT QuarterIntVT = MVT::getIntegerVT(QuarterVT.getSizeInBits()); + + SmallVector<SDValue, 8> Parts[8]; + for (unsigned I = 0, E = VT.getVectorNumElements() / 8; I != E; ++I) { + for (unsigned P = 0; P < 8; ++P) + Parts[P].push_back(Op.getOperand(I + P * E)); + } + SDValue Casts[8]; + for (unsigned P = 0; P < 8; ++P) { + SDValue Vec = DAG.getBuildVector(QuarterVT, SL, Parts[P]); + Casts[P] = DAG.getNode(ISD::BITCAST, SL, QuarterIntVT, Vec); + } + + SDValue Blend = + DAG.getBuildVector(MVT::getVectorVT(QuarterIntVT, 8), SL, Casts); + return DAG.getNode(ISD::BITCAST, SL, VT, Blend); + } + assert(VT == MVT::v2f16 || VT == MVT::v2i16); assert(!Subtarget->hasVOP3PInsts() && "this should be legal"); @@ -9507,7 +9557,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); - if (VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256) + if (VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256 || + VT.getSizeInBits() == 512) return splitTernaryVectorOp(Op, DAG); assert(VT.getSizeInBits() == 64); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 3cd0821b0f86c5f..e56269438472ee2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1619,6 +1619,16 @@ def : BitConvert <v12i32, v12f32, VReg_384>; def : BitConvert <v12f32, v12i32, VReg_384>; // 512-bit bitcast +def : BitConvert <v32f16, v32i16, VReg_512>; +def : BitConvert <v32i16, v32f16, VReg_512>; +def : BitConvert <v32f16, v16i32, VReg_512>; +def : BitConvert <v32f16, v16f32, VReg_512>; +def : BitConvert <v16f32, v32f16, VReg_512>; +def : BitConvert <v16i32, v32f16, VReg_512>; +def : BitConvert <v32i16, v16i32, VReg_512>; +def : BitConvert <v32i16, v16f32, VReg_512>; +def : BitConvert <v16f32, v32i16, VReg_512>; +def : BitConvert <v16i32, v32i16, VReg_512>; def : BitConvert <v16i32, v16f32, VReg_512>; def : BitConvert <v16f32, v16i32, VReg_512>; def : BitConvert <v8i64, v8f64, VReg_512>; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 304ee53cc5a87da..7ea2280c474b05e 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -930,7 +930,7 @@ defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>; defm "" : SRegClass<12, [v12i32, v12f32], SGPR_384Regs, TTMP_384Regs>; let GlobalPriority = true in { -defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>; +defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], SGPR_512Regs, TTMP_512Regs>; defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>; } @@ -984,7 +984,7 @@ defm VReg_352 : VRegClass<11, [v11i32, v11f32], (add VGPR_352)>; defm VReg_384 : VRegClass<12, [v12i32, v12f32], (add VGPR_384)>; let GlobalPriority = true in { -defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>; +defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], (add VGPR_512)>; defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>; } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll index 2bebc5ed9b53bde..2a966b4ea178f27 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll @@ -76,7 +76,7 @@ define amdgpu_kernel void @add_i16() #0 { ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef -; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v17i16 = add <17 x i16> undef, undef +; FAST16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v17i16 = add <17 x i16> undef, undef ; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; SLOW16-LABEL: 'add_i16' @@ -98,7 +98,7 @@ define amdgpu_kernel void @add_i16() #0 { ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef -; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v17i16 = add <17 x i16> undef, undef +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v17i16 = add <17 x i16> undef, undef ; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLOW16-SIZE-LABEL: 'add_i16' diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll index b57f26cdc2928be..564bc4912af7d30 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll @@ -57,8 +57,8 @@ define i32 @add(i32 %arg) { ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) @@ -115,8 +115,8 @@ define i32 @add(i32 %arg) { ; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef) -; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) ; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) @@ -237,8 +237,8 @@ define i32 @sub(i32 %arg) { ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) @@ -295,8 +295,8 @@ define i32 @sub(i32 %arg) { ; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef) -; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-SIZE-NEXT: Cos... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/70484 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits