[llvm-branch-commits] [llvm] AMDGPU: Simplify demanded vector elts of readfirstlane sources (PR #128646)
arsenm wrote: ### Merge activity * **Feb 28, 12:52 AM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/128646). https://github.com/llvm/llvm-project/pull/128646 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [MC/DC] Enable nested expressions (PR #125413)
@@ -275,49 +276,57 @@ struct MapRegionCounters : public RecursiveASTVisitor { // an AST Stmt node. MC/DC will use it to to signal when the top of a // logical operation (boolean expression) nest is encountered. bool dataTraverseStmtPost(Stmt *S) { -/// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing. -if (MCDCMaxCond == 0) +if (DecisionStack.empty()) return true; -if (const Expr *E = dyn_cast(S)) { - const BinaryOperator *BinOp = dyn_cast(E->IgnoreParens()); - if (BinOp && BinOp->isLogicalOp()) { -assert(LogOpStack.back() == BinOp); -LogOpStack.pop_back(); - -/// At the top of logical operator nest: -if (LogOpStack.empty()) { - /// Was the "split-nested" logical operator case encountered? - if (SplitNestedLogicalOp) { -unsigned DiagID = Diag.getCustomDiagID( -DiagnosticsEngine::Warning, -"unsupported MC/DC boolean expression; " -"contains an operation with a nested boolean expression. " -"Expression will not be covered"); -Diag.Report(S->getBeginLoc(), DiagID); -return true; - } - - /// Was the maximum number of conditions encountered? - if (NumCond > MCDCMaxCond) { -unsigned DiagID = Diag.getCustomDiagID( -DiagnosticsEngine::Warning, -"unsupported MC/DC boolean expression; " -"number of conditions (%0) exceeds max (%1). " -"Expression will not be covered"); -Diag.Report(S->getBeginLoc(), DiagID) << NumCond << MCDCMaxCond; -return true; - } - - // Otherwise, allocate the Decision. - MCDCState.DecisionByStmt[BinOp].ID = MCDCState.DecisionByStmt.size(); -} -return true; +/// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing. +assert(MCDCMaxCond > 0); + +auto &StackTop = DecisionStack.back(); + +if (StackTop.DecisionExpr != S) { + if (StackTop.Leaves.contains(S)) { +assert(StackTop.Split); +StackTop.Split = false; } + + return true; +} + +/// Allocate the entry (with Valid=false) +auto &DecisionEntry = +MCDCState +.DecisionByStmt[CodeGenFunction::stripCond(StackTop.DecisionExpr)]; + +/// Was the "split-nested" logical operator case encountered? +if (false && DecisionStack.size() > 1) { chapuni wrote: This is the option for deprecating this warning, "warn if nested level >= n" or deprecate. @evodius96 Opinions? https://github.com/llvm/llvm-project/pull/125413 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Handle demanded subvectors for readfirstlane (PR #128648)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/128648 >From ce66b73ac989b8f4d8ec03f704f2e72ee30a3b42 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 25 Feb 2025 12:51:44 +0700 Subject: [PATCH] AMDGPU: Handle demanded subvectors for readfirstlane --- .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 47 + ...fy-demanded-vector-elts-lane-intrinsics.ll | 51 +++ 2 files changed, 68 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 70ccd7edce2ac..b232baa4eb7f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -1574,33 +1574,60 @@ Value *GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded( const unsigned LastElt = DemandedElts.getActiveBits() - 1; const unsigned MaskLen = LastElt - FirstElt + 1; - // TODO: Handle general subvector extract. - if (MaskLen != 1) + unsigned OldNumElts = VT->getNumElements(); + if (MaskLen == OldNumElts && MaskLen != 1) return nullptr; Type *EltTy = VT->getElementType(); - if (!isTypeLegal(EltTy)) + Type *NewVT = MaskLen == 1 ? EltTy : FixedVectorType::get(EltTy, MaskLen); + + // Theoretically we should support these intrinsics for any legal type. Avoid + // introducing cases that aren't direct register types like v3i16. + if (!isTypeLegal(NewVT)) return nullptr; Value *Src = II.getArgOperand(0); - assert(FirstElt == LastElt); - Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt); - // Make sure convergence tokens are preserved. // TODO: CreateIntrinsic should allow directly copying bundles SmallVector OpBundles; II.getOperandBundlesAsDefs(OpBundles); Module *M = IC.Builder.GetInsertBlock()->getModule(); - Function *Remangled = Intrinsic::getOrInsertDeclaration( - M, II.getIntrinsicID(), {Extract->getType()}); + Function *Remangled = + Intrinsic::getOrInsertDeclaration(M, II.getIntrinsicID(), {NewVT}); + + if (MaskLen == 1) { +Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt); + +// TODO: Preserve callsite attributes? +CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles); + +Value *Result = IC.Builder.CreateInsertElement( +PoisonValue::get(II.getType()), NewCall, FirstElt); +IC.replaceInstUsesWith(II, Result); +IC.eraseInstFromFunction(II); +return Result; + } + + SmallVector ExtractMask(MaskLen, -1); + for (unsigned I = 0; I != MaskLen; ++I) { +if (DemandedElts[FirstElt + I]) + ExtractMask[I] = FirstElt + I; + } + + Value *Extract = IC.Builder.CreateShuffleVector(Src, ExtractMask); // TODO: Preserve callsite attributes? CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles); - Value *Result = IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()), - NewCall, FirstElt); + SmallVector InsertMask(OldNumElts, -1); + for (unsigned I = 0; I != MaskLen; ++I) { +if (DemandedElts[FirstElt + I]) + InsertMask[FirstElt + I] = I; + } + + Value *Result = IC.Builder.CreateShuffleVector(NewCall, InsertMask); IC.replaceInstUsesWith(II, Result); IC.eraseInstFromFunction(II); return Result; diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll index e9d3b5e963b35..056caabb6d60a 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll @@ -64,8 +64,8 @@ define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) { define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) { ; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane( ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT:[[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) -; CHECK-NEXT:[[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT:[[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT:[[SHUFFLE:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]]) ; CHECK-NEXT:ret <2 x i16> [[SHUFFLE]] ; %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) @@ -76,8 +76,8 @@ define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) { define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) { ; CHECK-LABEL: define <2 x i16> @extract_elt12_v4i16_readfirstlane( ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT:[[VEC:%.*]] = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]]) -; CHECK
[llvm-branch-commits] [clang] [MC/DC] Enable nested expressions (PR #125413)
@@ -228,45 +228,46 @@ struct MapRegionCounters : public RecursiveASTVisitor { /// The stacks are also used to find error cases and notify the user. A /// standard logical operator nest for a boolean expression could be in a form /// similar to this: "x = a && b && c && (d || f)" - unsigned NumCond = 0; - bool SplitNestedLogicalOp = false; - SmallVector NonLogOpStack; - SmallVector LogOpStack; + struct DecisionState { +llvm::DenseSet Leaves; // Not BinOp +const Expr *DecisionExpr;// Root +bool Split; ornata wrote: Can you document what splitting means in the code? https://github.com/llvm/llvm-project/pull/125413 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [MC/DC] Enable nested expressions (PR #125413)
@@ -275,49 +276,57 @@ struct MapRegionCounters : public RecursiveASTVisitor { // an AST Stmt node. MC/DC will use it to to signal when the top of a // logical operation (boolean expression) nest is encountered. bool dataTraverseStmtPost(Stmt *S) { -/// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing. -if (MCDCMaxCond == 0) +if (DecisionStack.empty()) return true; -if (const Expr *E = dyn_cast(S)) { - const BinaryOperator *BinOp = dyn_cast(E->IgnoreParens()); - if (BinOp && BinOp->isLogicalOp()) { -assert(LogOpStack.back() == BinOp); -LogOpStack.pop_back(); - -/// At the top of logical operator nest: -if (LogOpStack.empty()) { - /// Was the "split-nested" logical operator case encountered? - if (SplitNestedLogicalOp) { -unsigned DiagID = Diag.getCustomDiagID( -DiagnosticsEngine::Warning, -"unsupported MC/DC boolean expression; " -"contains an operation with a nested boolean expression. " -"Expression will not be covered"); -Diag.Report(S->getBeginLoc(), DiagID); -return true; - } - - /// Was the maximum number of conditions encountered? - if (NumCond > MCDCMaxCond) { -unsigned DiagID = Diag.getCustomDiagID( -DiagnosticsEngine::Warning, -"unsupported MC/DC boolean expression; " -"number of conditions (%0) exceeds max (%1). " -"Expression will not be covered"); -Diag.Report(S->getBeginLoc(), DiagID) << NumCond << MCDCMaxCond; -return true; - } - - // Otherwise, allocate the Decision. - MCDCState.DecisionByStmt[BinOp].ID = MCDCState.DecisionByStmt.size(); -} -return true; +/// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing. +assert(MCDCMaxCond > 0); + +auto &StackTop = DecisionStack.back(); + +if (StackTop.DecisionExpr != S) { + if (StackTop.Leaves.contains(S)) { +assert(StackTop.Split); +StackTop.Split = false; } + + return true; +} + +/// Allocate the entry (with Valid=false) +auto &DecisionEntry = +MCDCState +.DecisionByStmt[CodeGenFunction::stripCond(StackTop.DecisionExpr)]; + +/// Was the "split-nested" logical operator case encountered? +if (false && DecisionStack.size() > 1) { ornata wrote: if (false)? https://github.com/llvm/llvm-project/pull/125413 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL] Implement explicit layout for default constant buffer ($Globals) (PR #128991)
@@ -179,21 +179,45 @@ createBufferHandleType(const HLSLBufferDecl *BufDecl) { return cast(QT.getTypePtr()); } +// Iterates over all declarations in the HLSL buffer and based on the +// packoffset or register(c#) annotations it fills outs the Layout +// vector with the user-specified layout offsets. +// The buffer offsets can be specified 2 ways: +// 1. declarations in cbuffer {} block can have a packoffset annotation +//(translates to HLSLPackOffsetAttr) +// 2. default constant buffer declarations at global scope can have +//register(c#) annotations (translates to HLSLResourceBindingAttr with +//RegisterType::C) +// It is not quaranteed that all declarations in a buffer have an annotation. +// For those where it is not specified a -1 value is added to the Layout +// vector. In the final layout these declarations will be placed at the end +// of the HLSL buffer after all of the elements with specified offset. static void fillPackoffsetLayout(const HLSLBufferDecl *BufDecl, - SmallVector &Layout) { + SmallVector &Layout) { assert(Layout.empty() && "expected empty vector for layout"); assert(BufDecl->hasValidPackoffset()); - for (Decl *D : BufDecl->decls()) { + for (Decl *D : BufDecl->buffer_decls()) { if (isa(D) || isa(D)) { continue; } VarDecl *VD = dyn_cast(D); if (!VD || VD->getType().getAddressSpace() != LangAS::hlsl_constant) continue; -assert(VD->hasAttr() && - "expected packoffset attribute on every declaration"); -size_t Offset = VD->getAttr()->getOffsetInBytes(); +size_t Offset = -1; +if (VD->hasAttrs()) { + for (auto *Attr : VD->getAttrs()) { +if (auto *POA = dyn_cast(Attr)) { + Offset = POA->getOffsetInBytes(); +} else if (auto *RBA = dyn_cast(Attr)) { + if (RBA->getRegisterType() == + HLSLResourceBindingAttr::RegisterType::C) { +// size of constant buffer row is 16 bytes +Offset = RBA->getSlotNumber() * 16U; hekota wrote: I've named the constant and placed it in CGHLSLRuntime. Codegen does not have a dependency on Sema. https://github.com/llvm/llvm-project/pull/128991 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
https://github.com/fmayer updated https://github.com/llvm/llvm-project/pull/128977 >From 0fe2ba3242026457d8afc46c4a3338efd941c42f Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Wed, 26 Feb 2025 17:12:43 -0800 Subject: [PATCH 1/3] fmt Created using spr 1.3.4 --- clang/lib/CodeGen/CGExpr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index dbd24547b2304..dc3b253237e51 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3623,7 +3623,6 @@ void CodeGenFunction::EmitCheck( llvm::Value *RecoverableCond = nullptr; llvm::Value *TrapCond = nullptr; bool NoMerge = false; - // Expand checks into: // (Check1 || !allow_ubsan_check) && (Check2 || !allow_ubsan_check) ... // We need separate allow_ubsan_check intrinsics because they have separately @@ -3933,6 +3932,7 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked, TrapBBs.resize(CheckHandlerID + 1); llvm::BasicBlock *&TrapBB = TrapBBs[CheckHandlerID]; + NoMerge = NoMerge || !CGM.getCodeGenOpts().OptimizationLevel || (CurCodeDecl && CurCodeDecl->hasAttr()); >From a16b7a8c48353226fe1323a45f59cd4167ddc3d4 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Wed, 26 Feb 2025 17:15:20 -0800 Subject: [PATCH 2/3] rename & fmt Created using spr 1.3.4 --- clang/lib/CodeGen/CGDebugInfo.cpp | 7 --- clang/lib/CodeGen/CGDebugInfo.h| 8 +--- clang/lib/CodeGen/CGExpr.cpp | 5 ++--- clang/test/CodeGen/bounds-checking-debuginfo.c | 4 ++-- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index ae19e8f724314..35fd78b15ff30 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -3598,13 +3598,14 @@ llvm::DIMacroFile *CGDebugInfo::CreateTempMacroFile(llvm::DIMacroFile *Parent, return DBuilder.createTempMacroFile(Parent, Line, FName); } -llvm::DILocation *CGDebugInfo::CreateSyntheticInline( -llvm::DebugLoc TrapLocation, StringRef FuncName) { +llvm::DILocation * +CGDebugInfo::CreateSyntheticInline(llvm::DebugLoc TrapLocation, + StringRef FuncName) { llvm::DISubprogram *TrapSP = createInlinedTrapSubprogram(FuncName, TrapLocation->getFile()); return llvm::DILocation::get(CGM.getLLVMContext(), /*Line=*/0, /*Column=*/0, /*Scope=*/TrapSP, /*InlinedAt=*/TrapLocation); -} +} llvm::DILocation *CGDebugInfo::CreateTrapFailureMessageFor( llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg) { diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 0b06bdf78ac78..d01ad3b3d8df5 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -638,9 +638,11 @@ class CGDebugInfo { /// Create a debug location from `TrapLocation` that adds an artificial inline /// frame where the frame name is FuncName /// - /// This is used to indiciate instructions that come from compiler instrumentation. - llvm::DILocation *CreateSyntheticInline( - llvm::DebugLoc TrapLocation, StringRef FuncName); + /// This is used to indiciate instructions that come from compiler + /// instrumentation. + llvm::DILocation *CreateSyntheticInline(llvm::DebugLoc TrapLocation, + StringRef FuncName); + private: /// Emit call to llvm.dbg.declare for a variable declaration. /// Returns a pointer to the DILocalVariable associated with the diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index dc3b253237e51..d5cc2cc69c921 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1219,10 +1219,9 @@ void CodeGenFunction::EmitBoundsCheckImpl(const Expr *E, llvm::Value *Bound, llvm::DILocation *TrapSP = Builder.getCurrentDebugLocation(); if (TrapSP) { TrapSP = getDebugInfo()->CreateSyntheticInline( - Builder.getCurrentDebugLocation(), - "check_array_bounds"); +Builder.getCurrentDebugLocation(), "__ubsan_check_array_bounds"); } - ApplyDebugLocation ApplyTrapDI(*this, TrapSP); + ApplyDebugLocation ApplyTrapDI(*this, TrapSP); bool IndexSigned = IndexType->isSignedIntegerOrEnumerationType(); llvm::Value *IndexVal = Builder.CreateIntCast(Index, SizeTy, IndexSigned); diff --git a/clang/test/CodeGen/bounds-checking-debuginfo.c b/clang/test/CodeGen/bounds-checking-debuginfo.c index e2a604bc962ba..58fcc89058d72 100644 --- a/clang/test/CodeGen/bounds-checking-debuginfo.c +++ b/clang/test/CodeGen/bounds-checking-debuginfo.c @@ -89,7 +89,7 @@ double f1(int b, int i) { // CHECK-TRAP: [[DBG22]] = !DILocation(line: 65, column: 3, scope: [[DBG5]]) // CHECK-TRAP: [[DBG23]] = !DILocation(line: 66, column: 12, scope: [[DBG5]]) // CHECK-TRAP: [[DBG24]] = !DILocation(line: 0, scope: [[META
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
https://github.com/fmayer updated https://github.com/llvm/llvm-project/pull/128977 >From 0fe2ba3242026457d8afc46c4a3338efd941c42f Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Wed, 26 Feb 2025 17:12:43 -0800 Subject: [PATCH 1/3] fmt Created using spr 1.3.4 --- clang/lib/CodeGen/CGExpr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index dbd24547b2304..dc3b253237e51 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3623,7 +3623,6 @@ void CodeGenFunction::EmitCheck( llvm::Value *RecoverableCond = nullptr; llvm::Value *TrapCond = nullptr; bool NoMerge = false; - // Expand checks into: // (Check1 || !allow_ubsan_check) && (Check2 || !allow_ubsan_check) ... // We need separate allow_ubsan_check intrinsics because they have separately @@ -3933,6 +3932,7 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked, TrapBBs.resize(CheckHandlerID + 1); llvm::BasicBlock *&TrapBB = TrapBBs[CheckHandlerID]; + NoMerge = NoMerge || !CGM.getCodeGenOpts().OptimizationLevel || (CurCodeDecl && CurCodeDecl->hasAttr()); >From a16b7a8c48353226fe1323a45f59cd4167ddc3d4 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Wed, 26 Feb 2025 17:15:20 -0800 Subject: [PATCH 2/3] rename & fmt Created using spr 1.3.4 --- clang/lib/CodeGen/CGDebugInfo.cpp | 7 --- clang/lib/CodeGen/CGDebugInfo.h| 8 +--- clang/lib/CodeGen/CGExpr.cpp | 5 ++--- clang/test/CodeGen/bounds-checking-debuginfo.c | 4 ++-- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index ae19e8f724314..35fd78b15ff30 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -3598,13 +3598,14 @@ llvm::DIMacroFile *CGDebugInfo::CreateTempMacroFile(llvm::DIMacroFile *Parent, return DBuilder.createTempMacroFile(Parent, Line, FName); } -llvm::DILocation *CGDebugInfo::CreateSyntheticInline( -llvm::DebugLoc TrapLocation, StringRef FuncName) { +llvm::DILocation * +CGDebugInfo::CreateSyntheticInline(llvm::DebugLoc TrapLocation, + StringRef FuncName) { llvm::DISubprogram *TrapSP = createInlinedTrapSubprogram(FuncName, TrapLocation->getFile()); return llvm::DILocation::get(CGM.getLLVMContext(), /*Line=*/0, /*Column=*/0, /*Scope=*/TrapSP, /*InlinedAt=*/TrapLocation); -} +} llvm::DILocation *CGDebugInfo::CreateTrapFailureMessageFor( llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg) { diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 0b06bdf78ac78..d01ad3b3d8df5 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -638,9 +638,11 @@ class CGDebugInfo { /// Create a debug location from `TrapLocation` that adds an artificial inline /// frame where the frame name is FuncName /// - /// This is used to indiciate instructions that come from compiler instrumentation. - llvm::DILocation *CreateSyntheticInline( - llvm::DebugLoc TrapLocation, StringRef FuncName); + /// This is used to indiciate instructions that come from compiler + /// instrumentation. + llvm::DILocation *CreateSyntheticInline(llvm::DebugLoc TrapLocation, + StringRef FuncName); + private: /// Emit call to llvm.dbg.declare for a variable declaration. /// Returns a pointer to the DILocalVariable associated with the diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index dc3b253237e51..d5cc2cc69c921 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1219,10 +1219,9 @@ void CodeGenFunction::EmitBoundsCheckImpl(const Expr *E, llvm::Value *Bound, llvm::DILocation *TrapSP = Builder.getCurrentDebugLocation(); if (TrapSP) { TrapSP = getDebugInfo()->CreateSyntheticInline( - Builder.getCurrentDebugLocation(), - "check_array_bounds"); +Builder.getCurrentDebugLocation(), "__ubsan_check_array_bounds"); } - ApplyDebugLocation ApplyTrapDI(*this, TrapSP); + ApplyDebugLocation ApplyTrapDI(*this, TrapSP); bool IndexSigned = IndexType->isSignedIntegerOrEnumerationType(); llvm::Value *IndexVal = Builder.CreateIntCast(Index, SizeTy, IndexSigned); diff --git a/clang/test/CodeGen/bounds-checking-debuginfo.c b/clang/test/CodeGen/bounds-checking-debuginfo.c index e2a604bc962ba..58fcc89058d72 100644 --- a/clang/test/CodeGen/bounds-checking-debuginfo.c +++ b/clang/test/CodeGen/bounds-checking-debuginfo.c @@ -89,7 +89,7 @@ double f1(int b, int i) { // CHECK-TRAP: [[DBG22]] = !DILocation(line: 65, column: 3, scope: [[DBG5]]) // CHECK-TRAP: [[DBG23]] = !DILocation(line: 66, column: 12, scope: [[DBG5]]) // CHECK-TRAP: [[DBG24]] = !DILocation(line: 0, scope: [[META
[llvm-branch-commits] [llvm] AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy (PR #129059)
https://github.com/rampitec approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/129059 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL] Implement explicit layout for default constant buffer ($Globals) (PR #128991)
@@ -179,21 +179,45 @@ createBufferHandleType(const HLSLBufferDecl *BufDecl) { return cast(QT.getTypePtr()); } +// Iterates over all declarations in the HLSL buffer and based on the +// packoffset or register(c#) annotations it fills outs the Layout +// vector with the user-specified layout offsets. +// The buffer offsets can be specified 2 ways: +// 1. declarations in cbuffer {} block can have a packoffset annotation +//(translates to HLSLPackOffsetAttr) +// 2. default constant buffer declarations at global scope can have +//register(c#) annotations (translates to HLSLResourceBindingAttr with +//RegisterType::C) +// It is not quaranteed that all declarations in a buffer have an annotation. +// For those where it is not specified a -1 value is added to the Layout +// vector. In the final layout these declarations will be placed at the end +// of the HLSL buffer after all of the elements with specified offset. static void fillPackoffsetLayout(const HLSLBufferDecl *BufDecl, - SmallVector &Layout) { + SmallVector &Layout) { assert(Layout.empty() && "expected empty vector for layout"); assert(BufDecl->hasValidPackoffset()); - for (Decl *D : BufDecl->decls()) { + for (Decl *D : BufDecl->buffer_decls()) { if (isa(D) || isa(D)) { continue; } VarDecl *VD = dyn_cast(D); if (!VD || VD->getType().getAddressSpace() != LangAS::hlsl_constant) continue; -assert(VD->hasAttr() && - "expected packoffset attribute on every declaration"); -size_t Offset = VD->getAttr()->getOffsetInBytes(); +size_t Offset = -1; +if (VD->hasAttrs()) { + for (auto *Attr : VD->getAttrs()) { +if (auto *POA = dyn_cast(Attr)) { + Offset = POA->getOffsetInBytes(); +} else if (auto *RBA = dyn_cast(Attr)) { + if (RBA->getRegisterType() == + HLSLResourceBindingAttr::RegisterType::C) { +// size of constant buffer row is 16 bytes +Offset = RBA->getSlotNumber() * 16U; damyanp wrote: Thanks, I think this is probably fine here, but I wonder more generally if there's any way we can get Sema and CG to agree on constants and things like that? Does Sema depend on Codegen? https://github.com/llvm/llvm-project/pull/128991 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
https://github.com/fmayer updated https://github.com/llvm/llvm-project/pull/128977 >From 0fe2ba3242026457d8afc46c4a3338efd941c42f Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Wed, 26 Feb 2025 17:12:43 -0800 Subject: [PATCH 1/3] fmt Created using spr 1.3.4 --- clang/lib/CodeGen/CGExpr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index dbd24547b2304..dc3b253237e51 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3623,7 +3623,6 @@ void CodeGenFunction::EmitCheck( llvm::Value *RecoverableCond = nullptr; llvm::Value *TrapCond = nullptr; bool NoMerge = false; - // Expand checks into: // (Check1 || !allow_ubsan_check) && (Check2 || !allow_ubsan_check) ... // We need separate allow_ubsan_check intrinsics because they have separately @@ -3933,6 +3932,7 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked, TrapBBs.resize(CheckHandlerID + 1); llvm::BasicBlock *&TrapBB = TrapBBs[CheckHandlerID]; + NoMerge = NoMerge || !CGM.getCodeGenOpts().OptimizationLevel || (CurCodeDecl && CurCodeDecl->hasAttr()); >From a16b7a8c48353226fe1323a45f59cd4167ddc3d4 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Wed, 26 Feb 2025 17:15:20 -0800 Subject: [PATCH 2/3] rename & fmt Created using spr 1.3.4 --- clang/lib/CodeGen/CGDebugInfo.cpp | 7 --- clang/lib/CodeGen/CGDebugInfo.h| 8 +--- clang/lib/CodeGen/CGExpr.cpp | 5 ++--- clang/test/CodeGen/bounds-checking-debuginfo.c | 4 ++-- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index ae19e8f724314..35fd78b15ff30 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -3598,13 +3598,14 @@ llvm::DIMacroFile *CGDebugInfo::CreateTempMacroFile(llvm::DIMacroFile *Parent, return DBuilder.createTempMacroFile(Parent, Line, FName); } -llvm::DILocation *CGDebugInfo::CreateSyntheticInline( -llvm::DebugLoc TrapLocation, StringRef FuncName) { +llvm::DILocation * +CGDebugInfo::CreateSyntheticInline(llvm::DebugLoc TrapLocation, + StringRef FuncName) { llvm::DISubprogram *TrapSP = createInlinedTrapSubprogram(FuncName, TrapLocation->getFile()); return llvm::DILocation::get(CGM.getLLVMContext(), /*Line=*/0, /*Column=*/0, /*Scope=*/TrapSP, /*InlinedAt=*/TrapLocation); -} +} llvm::DILocation *CGDebugInfo::CreateTrapFailureMessageFor( llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg) { diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 0b06bdf78ac78..d01ad3b3d8df5 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -638,9 +638,11 @@ class CGDebugInfo { /// Create a debug location from `TrapLocation` that adds an artificial inline /// frame where the frame name is FuncName /// - /// This is used to indiciate instructions that come from compiler instrumentation. - llvm::DILocation *CreateSyntheticInline( - llvm::DebugLoc TrapLocation, StringRef FuncName); + /// This is used to indiciate instructions that come from compiler + /// instrumentation. + llvm::DILocation *CreateSyntheticInline(llvm::DebugLoc TrapLocation, + StringRef FuncName); + private: /// Emit call to llvm.dbg.declare for a variable declaration. /// Returns a pointer to the DILocalVariable associated with the diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index dc3b253237e51..d5cc2cc69c921 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1219,10 +1219,9 @@ void CodeGenFunction::EmitBoundsCheckImpl(const Expr *E, llvm::Value *Bound, llvm::DILocation *TrapSP = Builder.getCurrentDebugLocation(); if (TrapSP) { TrapSP = getDebugInfo()->CreateSyntheticInline( - Builder.getCurrentDebugLocation(), - "check_array_bounds"); +Builder.getCurrentDebugLocation(), "__ubsan_check_array_bounds"); } - ApplyDebugLocation ApplyTrapDI(*this, TrapSP); + ApplyDebugLocation ApplyTrapDI(*this, TrapSP); bool IndexSigned = IndexType->isSignedIntegerOrEnumerationType(); llvm::Value *IndexVal = Builder.CreateIntCast(Index, SizeTy, IndexSigned); diff --git a/clang/test/CodeGen/bounds-checking-debuginfo.c b/clang/test/CodeGen/bounds-checking-debuginfo.c index e2a604bc962ba..58fcc89058d72 100644 --- a/clang/test/CodeGen/bounds-checking-debuginfo.c +++ b/clang/test/CodeGen/bounds-checking-debuginfo.c @@ -89,7 +89,7 @@ double f1(int b, int i) { // CHECK-TRAP: [[DBG22]] = !DILocation(line: 65, column: 3, scope: [[DBG5]]) // CHECK-TRAP: [[DBG23]] = !DILocation(line: 66, column: 12, scope: [[DBG5]]) // CHECK-TRAP: [[DBG24]] = !DILocation(line: 0, scope: [[META
[llvm-branch-commits] [clang] [clang] [sanitizer] add pseudofunction to indicate array-bounds check (PR #128977)
https://github.com/fmayer updated https://github.com/llvm/llvm-project/pull/128977 >From 0fe2ba3242026457d8afc46c4a3338efd941c42f Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Wed, 26 Feb 2025 17:12:43 -0800 Subject: [PATCH 1/3] fmt Created using spr 1.3.4 --- clang/lib/CodeGen/CGExpr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index dbd24547b2304..dc3b253237e51 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3623,7 +3623,6 @@ void CodeGenFunction::EmitCheck( llvm::Value *RecoverableCond = nullptr; llvm::Value *TrapCond = nullptr; bool NoMerge = false; - // Expand checks into: // (Check1 || !allow_ubsan_check) && (Check2 || !allow_ubsan_check) ... // We need separate allow_ubsan_check intrinsics because they have separately @@ -3933,6 +3932,7 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked, TrapBBs.resize(CheckHandlerID + 1); llvm::BasicBlock *&TrapBB = TrapBBs[CheckHandlerID]; + NoMerge = NoMerge || !CGM.getCodeGenOpts().OptimizationLevel || (CurCodeDecl && CurCodeDecl->hasAttr()); >From a16b7a8c48353226fe1323a45f59cd4167ddc3d4 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Wed, 26 Feb 2025 17:15:20 -0800 Subject: [PATCH 2/3] rename & fmt Created using spr 1.3.4 --- clang/lib/CodeGen/CGDebugInfo.cpp | 7 --- clang/lib/CodeGen/CGDebugInfo.h| 8 +--- clang/lib/CodeGen/CGExpr.cpp | 5 ++--- clang/test/CodeGen/bounds-checking-debuginfo.c | 4 ++-- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index ae19e8f724314..35fd78b15ff30 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -3598,13 +3598,14 @@ llvm::DIMacroFile *CGDebugInfo::CreateTempMacroFile(llvm::DIMacroFile *Parent, return DBuilder.createTempMacroFile(Parent, Line, FName); } -llvm::DILocation *CGDebugInfo::CreateSyntheticInline( -llvm::DebugLoc TrapLocation, StringRef FuncName) { +llvm::DILocation * +CGDebugInfo::CreateSyntheticInline(llvm::DebugLoc TrapLocation, + StringRef FuncName) { llvm::DISubprogram *TrapSP = createInlinedTrapSubprogram(FuncName, TrapLocation->getFile()); return llvm::DILocation::get(CGM.getLLVMContext(), /*Line=*/0, /*Column=*/0, /*Scope=*/TrapSP, /*InlinedAt=*/TrapLocation); -} +} llvm::DILocation *CGDebugInfo::CreateTrapFailureMessageFor( llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg) { diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 0b06bdf78ac78..d01ad3b3d8df5 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -638,9 +638,11 @@ class CGDebugInfo { /// Create a debug location from `TrapLocation` that adds an artificial inline /// frame where the frame name is FuncName /// - /// This is used to indiciate instructions that come from compiler instrumentation. - llvm::DILocation *CreateSyntheticInline( - llvm::DebugLoc TrapLocation, StringRef FuncName); + /// This is used to indiciate instructions that come from compiler + /// instrumentation. + llvm::DILocation *CreateSyntheticInline(llvm::DebugLoc TrapLocation, + StringRef FuncName); + private: /// Emit call to llvm.dbg.declare for a variable declaration. /// Returns a pointer to the DILocalVariable associated with the diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index dc3b253237e51..d5cc2cc69c921 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1219,10 +1219,9 @@ void CodeGenFunction::EmitBoundsCheckImpl(const Expr *E, llvm::Value *Bound, llvm::DILocation *TrapSP = Builder.getCurrentDebugLocation(); if (TrapSP) { TrapSP = getDebugInfo()->CreateSyntheticInline( - Builder.getCurrentDebugLocation(), - "check_array_bounds"); +Builder.getCurrentDebugLocation(), "__ubsan_check_array_bounds"); } - ApplyDebugLocation ApplyTrapDI(*this, TrapSP); + ApplyDebugLocation ApplyTrapDI(*this, TrapSP); bool IndexSigned = IndexType->isSignedIntegerOrEnumerationType(); llvm::Value *IndexVal = Builder.CreateIntCast(Index, SizeTy, IndexSigned); diff --git a/clang/test/CodeGen/bounds-checking-debuginfo.c b/clang/test/CodeGen/bounds-checking-debuginfo.c index e2a604bc962ba..58fcc89058d72 100644 --- a/clang/test/CodeGen/bounds-checking-debuginfo.c +++ b/clang/test/CodeGen/bounds-checking-debuginfo.c @@ -89,7 +89,7 @@ double f1(int b, int i) { // CHECK-TRAP: [[DBG22]] = !DILocation(line: 65, column: 3, scope: [[DBG5]]) // CHECK-TRAP: [[DBG23]] = !DILocation(line: 66, column: 12, scope: [[DBG5]]) // CHECK-TRAP: [[DBG24]] = !DILocation(line: 0, scope: [[META
[llvm-branch-commits] [clang] [HLSL] Implement explicit layout for default constant buffer ($Globals) (PR #128991)
https://github.com/hekota updated https://github.com/llvm/llvm-project/pull/128991 >From e982a61657da5eb4c7f2618c95f0c6d3493cb854 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Wed, 26 Feb 2025 19:14:20 -0800 Subject: [PATCH 1/3] [HLSL] Implement explicit layout for default constant buffer Fixes #123801 --- clang/lib/CodeGen/CGHLSLRuntime.cpp | 38 ++-- clang/lib/CodeGen/CGHLSLRuntime.h | 2 +- clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp | 87 +++ clang/lib/CodeGen/HLSLBufferLayoutBuilder.h | 7 +- clang/lib/CodeGen/TargetInfo.h| 2 +- clang/lib/CodeGen/Targets/DirectX.cpp | 4 +- clang/lib/CodeGen/Targets/SPIR.cpp| 4 +- clang/lib/Sema/SemaHLSL.cpp | 12 +++ .../CodeGenHLSL/cbuffer_with_packoffset.hlsl | 17 +++- .../default_cbuffer_with_layout.hlsl | 37 10 files changed, 175 insertions(+), 35 deletions(-) create mode 100644 clang/test/CodeGenHLSL/default_cbuffer_with_layout.hlsl diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index ed6d2036cb984..6f476d7df4578 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -70,7 +70,7 @@ void addDxilValVersion(StringRef ValVersionStr, llvm::Module &M) { llvm::Type * CGHLSLRuntime::convertHLSLSpecificType(const Type *T, - SmallVector *Packoffsets) { + SmallVector *Packoffsets) { assert(T->isHLSLSpecificType() && "Not an HLSL specific type!"); // Check if the target has a specific translation for this type first. @@ -179,21 +179,45 @@ createBufferHandleType(const HLSLBufferDecl *BufDecl) { return cast(QT.getTypePtr()); } +// Iterates over all declarations in the HLSL buffer and based on the +// packoffset or register(c#) annotations it fills outs the Layout +// vector with the user-specified layout offsets. +// The buffer offsets can be specified 2 ways: +// 1. declarations in cbuffer {} block can have a packoffset annotation +//(translates to HLSLPackOffsetAttr) +// 2. default constant buffer declarations at global scope can have +//register(c#) annotations (translates to HLSLResourceBindingAttr with +//RegisterType::C) +// It is not quaranteed that all declarations in a buffer have an annotation. +// For those where it is not specified a -1 value is added to the Layout +// vector. In the final layout these declarations will be placed at the end +// of the HLSL buffer after all of the elements with specified offset. static void fillPackoffsetLayout(const HLSLBufferDecl *BufDecl, - SmallVector &Layout) { + SmallVector &Layout) { assert(Layout.empty() && "expected empty vector for layout"); assert(BufDecl->hasValidPackoffset()); - for (Decl *D : BufDecl->decls()) { + for (Decl *D : BufDecl->buffer_decls()) { if (isa(D) || isa(D)) { continue; } VarDecl *VD = dyn_cast(D); if (!VD || VD->getType().getAddressSpace() != LangAS::hlsl_constant) continue; -assert(VD->hasAttr() && - "expected packoffset attribute on every declaration"); -size_t Offset = VD->getAttr()->getOffsetInBytes(); +size_t Offset = -1; +if (VD->hasAttrs()) { + for (auto *Attr : VD->getAttrs()) { +if (auto *POA = dyn_cast(Attr)) { + Offset = POA->getOffsetInBytes(); +} else if (auto *RBA = dyn_cast(Attr)) { + if (RBA->getRegisterType() == + HLSLResourceBindingAttr::RegisterType::C) { +// size of constant buffer row is 16 bytes +Offset = RBA->getSlotNumber() * 16U; + } +} + } +} Layout.push_back(Offset); } } @@ -212,7 +236,7 @@ void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) { return; // create global variable for the constant buffer - SmallVector Layout; + SmallVector Layout; if (BufDecl->hasValidPackoffset()) fillPackoffsetLayout(BufDecl, Layout); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index a9da42324a038..c4550056175c1 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -146,7 +146,7 @@ class CGHLSLRuntime { llvm::Type * convertHLSLSpecificType(const Type *T, - SmallVector *Packoffsets = nullptr); + SmallVector *Packoffsets = nullptr); void annotateHLSLResource(const VarDecl *D, llvm::GlobalVariable *GV); void generateGlobalCtorDtorCalls(); diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp index 97262b76c0164..bf9bca48a4dd6 100644 --- a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp +++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp @@ -10,6 +10,7 @@ #include "CGHLSLRuntime.h" #include "CodeGenModule.
[llvm-branch-commits] [llvm] release/20.x: Reland "[LV]: Teach LV to recursively (de)interleave." (#125094) (PR #128389)
nikic wrote: I think this is something that @fhahn as the LoopVectorize maintainer should decide. I personally still don't see why this backport is necessary. https://github.com/llvm/llvm-project/pull/128389 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Fix merge-fdata for memory events (PR #128108)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/128108 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Fix merge-fdata for memory events (PR #128108)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/128108 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy (PR #129059)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/129059 >From b221f64e931ffd8ae0a6b288d8c192f80f851876 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 27 Feb 2025 20:40:52 +0700 Subject: [PATCH] AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy This was trying to hack around the intermediate VGPR requirement to copy to AGPRs on gfx908. We should still use a copy for all reg-to-reg cases. This should matter less these days, as we reserve a VGPR to handle it when required (and no end to end tests need updating). This was also an obstacle to handling this fold for input registers which are larger than 32-bits. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 5 ++--- ...si-fold-operands-agpr-copy-reg-sequence.mir | 18 +- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 6cb6863068b5f..eb9aabf8b6317 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1573,9 +1573,8 @@ bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const { Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def); } - auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); - BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp) - .addReg(Vgpr); + Register Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); + BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Tmp).addReg(Vgpr); B.addReg(Tmp); } diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir index f45b35e239587..9d167f578e9eb 100644 --- a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir +++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir @@ -206,11 +206,11 @@ body: | ; CHECK-LABEL: name: s_mov_b32_999_splat_sgpr_128_copy_vgpr_copy_agpr ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 999 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 +; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE]] ; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 %0:sgpr_32 = S_MOV_B32 999 @@ -232,10 +232,10 @@ body: | ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1 ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec -; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 +; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub3 ; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE]] ; CHECK-N
[llvm-branch-commits] [llvm] AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy (PR #129059)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/129059 >From b221f64e931ffd8ae0a6b288d8c192f80f851876 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 27 Feb 2025 20:40:52 +0700 Subject: [PATCH] AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy This was trying to hack around the intermediate VGPR requirement to copy to AGPRs on gfx908. We should still use a copy for all reg-to-reg cases. This should matter less these days, as we reserve a VGPR to handle it when required (and no end to end tests need updating). This was also an obstacle to handling this fold for input registers which are larger than 32-bits. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 5 ++--- ...si-fold-operands-agpr-copy-reg-sequence.mir | 18 +- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 6cb6863068b5f..eb9aabf8b6317 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1573,9 +1573,8 @@ bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const { Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def); } - auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); - BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp) - .addReg(Vgpr); + Register Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); + BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Tmp).addReg(Vgpr); B.addReg(Tmp); } diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir index f45b35e239587..9d167f578e9eb 100644 --- a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir +++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir @@ -206,11 +206,11 @@ body: | ; CHECK-LABEL: name: s_mov_b32_999_splat_sgpr_128_copy_vgpr_copy_agpr ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 999 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 +; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE]] ; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 %0:sgpr_32 = S_MOV_B32 999 @@ -232,10 +232,10 @@ body: | ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1 ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec -; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 +; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub3 ; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE]] ; CHECK-N
[llvm-branch-commits] [llvm] AMDGPU: Add mir test for agpr constant reg_sequence handling (PR #129058)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/129058 >From 6898b936d27a6cc5dd8c0c4c8b45f8b359188f5b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Feb 2025 22:59:31 +0700 Subject: [PATCH 1/2] AMDGPU: Add mir test for agpr constant reg_sequence handling --- ...i-fold-operands-agpr-copy-reg-sequence.mir | 559 ++ 1 file changed, 559 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir new file mode 100644 index 0..95112826b7112 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir @@ -0,0 +1,559 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=si-fold-operands -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-fold-operands -o - %s | FileCheck %s + +--- +name: s_mov_b32_0_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: s_mov_b32_0_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:sgpr_32 = S_MOV_B32 0 +%1:vgpr_32 = COPY killed %0 +%2:vreg_128 = REG_SEQUENCE %1, %subreg.sub0, %1, %subreg.sub1, %1, %subreg.sub2, %1, %subreg.sub3 +%3:areg_128 = COPY %2 +$agpr0_agpr1_agpr2_agpr3 = COPY %3 +S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + +... + +--- +name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +%1:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1, %0, %subreg.sub2, %0, %subreg.sub3 +%2:areg_128 = COPY killed %1 +$agpr0_agpr1_agpr2_agpr3 = COPY %2 +S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + +... + +--- +name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr_multi_use +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr_multi_use +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: S_NOP 0, implicit [[REG_SEQUENCE]] +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +%1:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1, %0, %subreg.sub2, %0, %subreg.sub3 +S_NOP 0, implicit %1 +%2:areg_128 = COPY killed %1 +$agpr0_agpr1_agpr2_agpr3 = COPY %2 +S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + +... + +--- +name: s_mov_b32_literal_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: s_mov_b32_literal_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:sgpr_32 = S_MOV_B32 999 +%1:vgpr_32 = COPY %0 +%2:vreg_128 = REG_SEQUENCE %1, %subreg.sub0, %1, %subre
[llvm-branch-commits] [llvm] AMDGPU: Add mir test for agpr constant reg_sequence handling (PR #129058)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/129058 >From 6898b936d27a6cc5dd8c0c4c8b45f8b359188f5b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Feb 2025 22:59:31 +0700 Subject: [PATCH 1/2] AMDGPU: Add mir test for agpr constant reg_sequence handling --- ...i-fold-operands-agpr-copy-reg-sequence.mir | 559 ++ 1 file changed, 559 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir new file mode 100644 index 0..95112826b7112 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir @@ -0,0 +1,559 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=si-fold-operands -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-fold-operands -o - %s | FileCheck %s + +--- +name: s_mov_b32_0_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: s_mov_b32_0_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:sgpr_32 = S_MOV_B32 0 +%1:vgpr_32 = COPY killed %0 +%2:vreg_128 = REG_SEQUENCE %1, %subreg.sub0, %1, %subreg.sub1, %1, %subreg.sub2, %1, %subreg.sub3 +%3:areg_128 = COPY %2 +$agpr0_agpr1_agpr2_agpr3 = COPY %3 +S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + +... + +--- +name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +%1:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1, %0, %subreg.sub2, %0, %subreg.sub3 +%2:areg_128 = COPY killed %1 +$agpr0_agpr1_agpr2_agpr3 = COPY %2 +S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + +... + +--- +name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr_multi_use +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr_multi_use +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: S_NOP 0, implicit [[REG_SEQUENCE]] +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +%1:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1, %0, %subreg.sub2, %0, %subreg.sub3 +S_NOP 0, implicit %1 +%2:areg_128 = COPY killed %1 +$agpr0_agpr1_agpr2_agpr3 = COPY %2 +S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + +... + +--- +name: s_mov_b32_literal_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: s_mov_b32_literal_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:sgpr_32 = S_MOV_B32 999 +%1:vgpr_32 = COPY %0 +%2:vreg_128 = REG_SEQUENCE %1, %subreg.sub0, %1, %subre
[llvm-branch-commits] [llvm] [AArch64] Fall back to SDAG for instructions with emulated TLS variables (PR #129076)
nikic wrote: This needs to go into the main branch first, before it can be consider for backport (to LLVM 20 only). I'll close this PR due to the mass-subscribe. https://github.com/llvm/llvm-project/pull/129076 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64] Fall back to SDAG for instructions with emulated TLS variables (PR #129076)
https://github.com/sschaller edited https://github.com/llvm/llvm-project/pull/129076 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64] Fall back to SDAG for instructions with emulated TLS variables (PR #129076)
https://github.com/nikic closed https://github.com/llvm/llvm-project/pull/129076 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64] Fall back to SDAG for instructions with emulated TLS variables (PR #129076)
sschaller wrote: Will do, sorry about that. https://github.com/llvm/llvm-project/pull/129076 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang-format] Fix a bug that changes keyword `or` to an identifier (PR #128996)
https://github.com/owenca milestoned https://github.com/llvm/llvm-project/pull/128996 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Support -regalloc-npm options (PR #129035)
https://github.com/cdevadas edited https://github.com/llvm/llvm-project/pull/129035 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Support -regalloc-npm options (PR #129035)
@@ -2,11 +2,17 @@ # RUN: llc -mtriple=amdgcn --passes='regallocfast,regallocfast,regallocfast' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=PASS # RUN: not llc -mtriple=amdgcn --passes='regallocfast' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER +# RUN: llc -mtriple=amdgcn -enable-new-pm -sgpr-regalloc-npm=greedy -wwm-regalloc-npm=fast -vgpr-regalloc-npm=fast -print-pipeline-passes %s | FileCheck %s --check-prefix=NPM-PASS + + # PASS: regallocfast # PASS: regallocfast # PASS: regallocfast # BAD-FILTER: invalid regallocfast register filter 'bad-filter' +# NPM-PASS: greedy +# NPM-PASS: regallocfast cdevadas wrote: Why this option `no-clear-vregs` is exposed to the commandline? This was originally an internal flag to control the vreg clearing for targets requiring multiple regalloc pipelines. https://github.com/llvm/llvm-project/pull/129035 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Support -regalloc-npm options (PR #129035)
https://github.com/cdevadas commented: LGTM. https://github.com/llvm/llvm-project/pull/129035 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Support -regalloc-npm options (PR #129035)
@@ -2,11 +2,17 @@ # RUN: llc -mtriple=amdgcn --passes='regallocfast,regallocfast,regallocfast' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=PASS # RUN: not llc -mtriple=amdgcn --passes='regallocfast' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER +# RUN: llc -mtriple=amdgcn -enable-new-pm -sgpr-regalloc-npm=greedy -wwm-regalloc-npm=fast -vgpr-regalloc-npm=fast -print-pipeline-passes %s | FileCheck %s --check-prefix=NPM-PASS + + # PASS: regallocfast # PASS: regallocfast # PASS: regallocfast # BAD-FILTER: invalid regallocfast register filter 'bad-filter' +# NPM-PASS: greedy +# NPM-PASS: regallocfast arsenm wrote: How else would you test this part of the pipeline standalone https://github.com/llvm/llvm-project/pull/129035 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Support -regalloc-npm options (PR #129035)
@@ -2,11 +2,17 @@ # RUN: llc -mtriple=amdgcn --passes='regallocfast,regallocfast,regallocfast' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=PASS # RUN: not llc -mtriple=amdgcn --passes='regallocfast' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER +# RUN: llc -mtriple=amdgcn -enable-new-pm -sgpr-regalloc-npm=greedy -wwm-regalloc-npm=fast -vgpr-regalloc-npm=fast -print-pipeline-passes %s | FileCheck %s --check-prefix=NPM-PASS + + # PASS: regallocfast # PASS: regallocfast # PASS: regallocfast # BAD-FILTER: invalid regallocfast register filter 'bad-filter' +# NPM-PASS: greedy +# NPM-PASS: regallocfast cdevadas wrote: I don't remember seeing a command line option for doing it in the legacy path. So it's something new we're introducing in the NPM? https://github.com/llvm/llvm-project/pull/129035 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add mir test for agpr constant reg_sequence handling (PR #129058)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/129058 None >From 3fe0c486507705493e24b75b480469bda885b086 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 26 Feb 2025 22:59:31 +0700 Subject: [PATCH] AMDGPU: Add mir test for agpr constant reg_sequence handling --- ...i-fold-operands-agpr-copy-reg-sequence.mir | 559 ++ 1 file changed, 559 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir new file mode 100644 index 0..95112826b7112 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir @@ -0,0 +1,559 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=si-fold-operands -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-fold-operands -o - %s | FileCheck %s + +--- +name: s_mov_b32_0_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: s_mov_b32_0_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:sgpr_32 = S_MOV_B32 0 +%1:vgpr_32 = COPY killed %0 +%2:vreg_128 = REG_SEQUENCE %1, %subreg.sub0, %1, %subreg.sub1, %1, %subreg.sub2, %1, %subreg.sub3 +%3:areg_128 = COPY %2 +$agpr0_agpr1_agpr2_agpr3 = COPY %3 +S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + +... + +--- +name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +%1:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1, %0, %subreg.sub2, %0, %subreg.sub3 +%2:areg_128 = COPY killed %1 +$agpr0_agpr1_agpr2_agpr3 = COPY %2 +S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + +... + +--- +name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr_multi_use +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr_multi_use +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: S_NOP 0, implicit [[REG_SEQUENCE]] +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +%1:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1, %0, %subreg.sub2, %0, %subreg.sub3 +S_NOP 0, implicit %1 +%2:areg_128 = COPY killed %1 +$agpr0_agpr1_agpr2_agpr3 = COPY %2 +S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + +... + +--- +name: s_mov_b32_literal_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: s_mov_b32_literal_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:sgpr_32 = S_MOV_B32 999 +%1:vgpr_32 = COPY %0 +%2:vreg_128 = REG_SEQUENCE %1, %subreg.sub0, %1, %subr
[llvm-branch-commits] [llvm] AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy (PR #129059)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/129059 This was trying to hack around the intermediate VGPR requirement to copy to AGPRs on gfx908. We should still use a copy for all reg-to-reg cases. This should matter less these days, as we reserve a VGPR to handle it when required (and no end to end tests need updating). This was also an obstacle to handling this fold for input registers which are larger than 32-bits. >From 12e4b8a7d60c043ff5fd442bdf629288d720d271 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 27 Feb 2025 20:40:52 +0700 Subject: [PATCH] AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy This was trying to hack around the intermediate VGPR requirement to copy to AGPRs on gfx908. We should still use a copy for all reg-to-reg cases. This should matter less these days, as we reserve a VGPR to handle it when required (and no end to end tests need updating). This was also an obstacle to handling this fold for input registers which are larger than 32-bits. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 5 ++--- ...si-fold-operands-agpr-copy-reg-sequence.mir | 18 +- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 6cb6863068b5f..eb9aabf8b6317 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1573,9 +1573,8 @@ bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const { Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def); } - auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); - BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp) - .addReg(Vgpr); + Register Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); + BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Tmp).addReg(Vgpr); B.addReg(Tmp); } diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir index 95112826b7112..493138c933686 100644 --- a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir +++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir @@ -206,11 +206,11 @@ body: | ; CHECK-LABEL: name: s_mov_b32_999_splat_sgpr_128_copy_vgpr_copy_agpr ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 999 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 +; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE]] ; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 %0:sgpr_32 = S_MOV_B32 999 @@ -232,10 +232,10 @@ body: | ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1 ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec -; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 +; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[V_ACCVG
[llvm-branch-commits] [llvm] AMDGPU: Add mir test for agpr constant reg_sequence handling (PR #129058)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/129058?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#129059** https://app.graphite.dev/github/pr/llvm/llvm-project/129059?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#129058** https://app.graphite.dev/github/pr/llvm/llvm-project/129058?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/129058?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#129052** https://app.graphite.dev/github/pr/llvm/llvm-project/129052?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/129058 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy (PR #129059)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/129059?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#129059** https://app.graphite.dev/github/pr/llvm/llvm-project/129059?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/129059?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#129058** https://app.graphite.dev/github/pr/llvm/llvm-project/129058?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#129052** https://app.graphite.dev/github/pr/llvm/llvm-project/129052?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/129059 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add mir test for agpr constant reg_sequence handling (PR #129058)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Patch is 28.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129058.diff 1 Files Affected: - (added) llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir (+559) ``diff diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir new file mode 100644 index 0..95112826b7112 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir @@ -0,0 +1,559 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=si-fold-operands -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-fold-operands -o - %s | FileCheck %s + +--- +name: s_mov_b32_0_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: s_mov_b32_0_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:sgpr_32 = S_MOV_B32 0 +%1:vgpr_32 = COPY killed %0 +%2:vreg_128 = REG_SEQUENCE %1, %subreg.sub0, %1, %subreg.sub1, %1, %subreg.sub2, %1, %subreg.sub3 +%3:areg_128 = COPY %2 +$agpr0_agpr1_agpr2_agpr3 = COPY %3 +S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + +... + +--- +name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +%1:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1, %0, %subreg.sub2, %0, %subreg.sub3 +%2:areg_128 = COPY killed %1 +$agpr0_agpr1_agpr2_agpr3 = COPY %2 +S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + +... + +--- +name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr_multi_use +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: v_mov_b32_0_vgpr_reg_sequence_128_splat_copy_to_agpr_multi_use +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: S_NOP 0, implicit [[REG_SEQUENCE]] +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +%1:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1, %0, %subreg.sub2, %0, %subreg.sub3 +S_NOP 0, implicit %1 +%2:areg_128 = COPY killed %1 +$agpr0_agpr1_agpr2_agpr3 = COPY %2 +S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 + +... + +--- +name: s_mov_b32_literal_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +tracksRegLiveness: true +body: | + bb.0: +; CHECK-LABEL: name: s_mov_b32_literal_copy_vgpr_reg_sequence_128_splat_copy_to_agpr +; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3 +; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY [[REG_SEQUENCE]] +; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[COPY]] +; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 +%0:sgpr_32 = S_MOV_B32 999 +%1:vgpr_32 = COPY %0 +%2:vreg_128 = REG_SEQUENCE %1, %subreg.sub0, %1, %subreg.sub1, %1, %subreg.sub2, %1, %subreg.sub3 +%3:areg_128 = COPY %2 +$agpr0_agpr1_agpr2_agpr3 = COPY %3 +S_ENDPGM 0, implicit $agpr0_agpr1_ag
[llvm-branch-commits] [llvm] AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy (PR #129059)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes This was trying to hack around the intermediate VGPR requirement to copy to AGPRs on gfx908. We should still use a copy for all reg-to-reg cases. This should matter less these days, as we reserve a VGPR to handle it when required (and no end to end tests need updating). This was also an obstacle to handling this fold for input registers which are larger than 32-bits. --- Full diff: https://github.com/llvm/llvm-project/pull/129059.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir (+9-9) ``diff diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 6cb6863068b5f..eb9aabf8b6317 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1573,9 +1573,8 @@ bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const { Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def); } - auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); - BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp) - .addReg(Vgpr); + Register Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass); + BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), Tmp).addReg(Vgpr); B.addReg(Tmp); } diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir index 95112826b7112..493138c933686 100644 --- a/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir +++ b/llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir @@ -206,11 +206,11 @@ body: | ; CHECK-LABEL: name: s_mov_b32_999_splat_sgpr_128_copy_vgpr_copy_agpr ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 999 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 +; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY4:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE]] ; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 %0:sgpr_32 = S_MOV_B32 999 @@ -232,10 +232,10 @@ body: | ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1 ; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 [[COPY]], implicit $exec -; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec -; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 +; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr_32 = COPY [[COPY]] +; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec +; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub3 ; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE]] ; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3 %0:sgpr_32 = S_MOV_B32 999 `` https://github.com/llvm/llvm-
[llvm-branch-commits] [llvm] AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy (PR #129059)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/129059 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add mir test for agpr constant reg_sequence handling (PR #129058)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/129058 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Stop introducing v_accvgpr_write_b32 for reg-to-reg copy (PR #129059)
https://github.com/Pierre-vh approved this pull request. https://github.com/llvm/llvm-project/pull/129059 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits