Author: Jan Patrick Lehr Date: 2025-03-07T13:13:26+01:00 New Revision: a25b6a1976cc628b4cba8a8c2a77c8e72279f2a1
URL: https://github.com/llvm/llvm-project/commit/a25b6a1976cc628b4cba8a8c2a77c8e72279f2a1 DIFF: https://github.com/llvm/llvm-project/commit/a25b6a1976cc628b4cba8a8c2a77c8e72279f2a1.diff LOG: Revert "AMDGPU: Handle demanded subvectors for readfirstlane (#128648)" This reverts commit af755af2003464f1cb9268de86b34d373cc6ac2d. Added: Modified: llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index ef076814ffdab..ebe740f884ea6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -1574,59 +1574,35 @@ Value *GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded( const unsigned LastElt = DemandedElts.getActiveBits() - 1; const unsigned MaskLen = LastElt - FirstElt + 1; - unsigned OldNumElts = VT->getNumElements(); - if (MaskLen == OldNumElts && MaskLen != 1) + // TODO: Handle general subvector extract. + if (MaskLen != 1) return nullptr; Type *EltTy = VT->getElementType(); - Type *NewVT = MaskLen == 1 ? EltTy : FixedVectorType::get(EltTy, MaskLen); - - // Theoretically we should support these intrinsics for any legal type. Avoid - // introducing cases that aren't direct register types like v3i16. - if (!isTypeLegal(NewVT)) + if (!isTypeLegal(EltTy)) return nullptr; Value *Src = II.getArgOperand(0); + assert(FirstElt == LastElt); + Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt); + // Make sure convergence tokens are preserved. // TODO: CreateIntrinsic should allow directly copying bundles SmallVector<OperandBundleDef, 2> OpBundles; II.getOperandBundlesAsDefs(OpBundles); Module *M = IC.Builder.GetInsertBlock()->getModule(); - Function *Remangled = - Intrinsic::getOrInsertDeclaration(M, II.getIntrinsicID(), {NewVT}); - - if (MaskLen == 1) { - Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt); - - // TODO: Preserve callsite attributes? - CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles); - - return IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()), - NewCall, FirstElt); - } - - SmallVector<int> ExtractMask(MaskLen, -1); - for (unsigned I = 0; I != MaskLen; ++I) { - if (DemandedElts[FirstElt + I]) - ExtractMask[I] = FirstElt + I; - } - - Value *Extract = IC.Builder.CreateShuffleVector(Src, ExtractMask); + Function *Remangled = Intrinsic::getOrInsertDeclaration( + M, II.getIntrinsicID(), {Extract->getType()}); // TODO: Preserve callsite attributes? CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles); - SmallVector<int> InsertMask(OldNumElts, -1); - for (unsigned I = 0; I != MaskLen; ++I) { - if (DemandedElts[FirstElt + I]) - InsertMask[FirstElt + I] = I; - } - // FIXME: If the call has a convergence bundle, we end up leaving the dead // call behind. - return IC.Builder.CreateShuffleVector(NewCall, InsertMask); + return IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()), NewCall, + FirstElt); } std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic( diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll index ec645a7ff4519..e9d3b5e963b35 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll @@ -64,8 +64,8 @@ define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) { define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) { ; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane( ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1> -; CHECK-NEXT: [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]]) +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1> ; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] ; %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) @@ -76,8 +76,8 @@ define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) { define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) { ; CHECK-LABEL: define <2 x i16> @extract_elt12_v4i16_readfirstlane( ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2> -; CHECK-NEXT: [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]]) +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 2> ; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] ; %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) @@ -88,8 +88,8 @@ define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) { define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) { ; CHECK-LABEL: define <2 x i16> @extract_elt23_v4i16_readfirstlane( ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> -; CHECK-NEXT: [[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]]) +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> ; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] ; %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) @@ -100,9 +100,8 @@ define <2 x i16> @extract_elt23_v4i16_readfirstlane(<4 x i16> %src) { define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) { ; CHECK-LABEL: define <2 x i16> @extract_elt10_v4i16_readfirstlane( ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 0, i32 1> -; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]]) -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> <i32 1, i32 0> +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 1, i32 0> ; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] ; %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) @@ -113,9 +112,7 @@ define <2 x i16> @extract_elt10_v4i16_readfirstlane(<4 x i16> %src) { define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) { ; CHECK-LABEL: define <2 x i16> @extract_elt32_v4i16_readfirstlane( ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3> -; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]]) -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1> +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 2> ; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] ; @@ -261,8 +258,8 @@ define <3 x i16> @extract_elt123_v4i16_readfirstlane(<4 x i16> %src) { define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) { ; CHECK-LABEL: define <3 x i32> @extract_elt012_v4i32_readfirstlane( ; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2> -; CHECK-NEXT: [[SHUFFLE:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]]) +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2> ; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]] ; %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src) @@ -273,8 +270,8 @@ define <3 x i32> @extract_elt012_v4i32_readfirstlane(<4 x i32> %src) { define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) { ; CHECK-LABEL: define <3 x i32> @extract_elt123_v4i32_readfirstlane( ; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3> -; CHECK-NEXT: [[SHUFFLE:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]]) +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3> ; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]] ; %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src) @@ -285,9 +282,7 @@ define <3 x i32> @extract_elt123_v4i32_readfirstlane(<4 x i32> %src) { define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) { ; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane( ; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3> -; CHECK-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]]) -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[SRC]]) ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3> ; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] ; @@ -326,9 +321,8 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0, ; CHECK-LABEL: define <2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1( ; CHECK-SAME: i32 [[SRC0:%.*]], i32 [[SRC2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> -; CHECK-NEXT: [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]]) -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> +; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0> +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3> ; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] ; @@ -371,10 +365,7 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergenc ; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry() ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0 ; CHECK-NEXT: [[INS_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 0> -; CHECK-NEXT: [[VEC1:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0> -; CHECK-NEXT: [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]]) [ "convergencectrl"(token [[T]]) ] -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2> +; CHECK-NEXT: [[VEC:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[INS_1]]) [ "convergencectrl"(token [[T]]) ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3> ; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] ; @@ -413,9 +404,7 @@ define <2 x i1> @extract_elt01_v4i1_readfirstlane(<4 x i1> %src) { define <2 x i32> @extract_elt13_v8i32_readfirstlane(<8 x i32> %src) { ; CHECK-LABEL: define <2 x i32> @extract_elt13_v8i32_readfirstlane( ; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3> -; CHECK-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]]) -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]]) ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <2 x i32> <i32 1, i32 3> ; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] ; @@ -439,9 +428,7 @@ define <2 x i32> @extract_elt03_v4i32_readfirstlane(<4 x i32> %src) { define <3 x i32> @extract_elt124_v8i32_readfirstlane(<8 x i32> %src) { ; CHECK-LABEL: define <3 x i32> @extract_elt124_v8i32_readfirstlane( ; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 4> -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[TMP1]]) -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 1, i32 poison, i32 3, i32 poison, i32 poison, i32 poison> +; CHECK-NEXT: [[VEC:%.*]] = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> [[SRC]]) ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <3 x i32> <i32 1, i32 2, i32 4> ; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]] ; _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits