[llvm-branch-commits] [llvm] AMDGPU: Account for read/write register intrinsics for AGPR usage (PR #161988)
https://github.com/arsenm created
https://github.com/llvm/llvm-project/pull/161988
Fix the special case intrinsics that can directly reference a physical
register. There's no reason to use this.
>From e1402218d01f9269d181d69a289e80aa68d84c01 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Sun, 5 Oct 2025 10:17:18 +0900
Subject: [PATCH] AMDGPU: Account for read/write register intrinsics for AGPR
usage
Fix the special case intrinsics that can directly reference a physical
register. There's no reason to use this.
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 17 +++-
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp| 15 +--
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 7 ++
.../AMDGPU/amdgpu-attributor-no-agpr.ll | 99 +--
4 files changed, 120 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 155b1a6a380dd..e8e39ff015eed 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1313,10 +1313,21 @@ struct AAAMDGPUNoAGPR : public
StateWrapper {
return false;
}
- // Some intrinsics may use AGPRs, but if we have a choice, we are not
- // required to use AGPRs.
- if (Callee->isIntrinsic())
+ switch (Callee->getIntrinsicID()) {
+ case Intrinsic::write_register:
+ case Intrinsic::read_register:
+ case Intrinsic::read_volatile_register: {
+const MDString *RegName = cast(
+cast(CB.getArgOperand(0))->getMetadata());
+auto [Kind, RegIdx, NumRegs] =
+AMDGPU::parseAsmPhysRegName(RegName->getString());
+return Kind != 'a';
+ }
+ default:
+// Some intrinsics may use AGPRs, but if we have a choice, we are not
+// required to use AGPRs.
return true;
+ }
// TODO: Handle callsite attributes
const auto *CalleeInfo = A.getAAFor(
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 20fa1412a778e..8e57498c56f76 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1577,12 +1577,7 @@ static bool isValidRegPrefix(char C) {
return C == 'v' || C == 's' || C == 'a';
}
-std::tuple
-parseAsmConstraintPhysReg(StringRef Constraint) {
- StringRef RegName = Constraint;
- if (!RegName.consume_front("{") || !RegName.consume_back("}"))
-return {};
-
+std::tuple parseAsmPhysRegName(StringRef RegName) {
char Kind = RegName.front();
if (!isValidRegPrefix(Kind))
return {};
@@ -1609,6 +1604,14 @@ parseAsmConstraintPhysReg(StringRef Constraint) {
return {};
}
+std::tuple
+parseAsmConstraintPhysReg(StringRef Constraint) {
+ StringRef RegName = Constraint;
+ if (!RegName.consume_front("{") || !RegName.consume_back("}"))
+return {};
+ return parseAsmPhysRegName(RegName);
+}
+
std::pair
getIntegerPairAttribute(const Function &F, StringRef Name,
std::pair Default,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 2b9c063f42a5e..b656414f2b85c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1013,6 +1013,13 @@ bool isReadOnlySegment(const GlobalValue *GV);
/// target triple \p TT, false otherwise.
bool shouldEmitConstantsToTextSection(const Triple &TT);
+/// Returns a valid charcode or 0 in the first entry if this is a valid
physical
+/// register name. Followed by the start register number, and the register
+/// width. Does not validate the number of registers exists in the class.
Unlike
+/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
+/// "{}".
+std::tuple parseAsmPhysRegName(StringRef
TupleString);
+
/// Returns a valid charcode or 0 in the first entry if this is a valid
physical
/// register constraint. Followed by the start register number, and the
register
/// width. Does not validate the number of registers exists in the class.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index f19d563067eb2..7fcd07486d54d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -168,7 +168,7 @@ declare void @unknown()
define amdgpu_kernel void @kernel_calls_extern() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT:call void @unknown()
; CHECK-NEXT:call void @use_most()
; CHECK-NEXT:ret void
@@ -180,8 +180,8 @@ define amdgpu_kernel void @kernel_calls_extern() {
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
-; CHEC
[llvm-branch-commits] [llvm] AMDGPU: Stop inferring amdgpu-agpr-alloc on irrelevant targets (PR #161957)
llvmbot wrote:
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
Changes
This only matters for subtargets with configurable AGPR allocation.
---
Patch is 159.43 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/161957.diff
25 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+3-1)
- (modified) llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
(+20-20)
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (+13-13)
- (modified) llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll (+9-9)
- (modified)
llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
(+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll (+18-20)
- (modified) llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll (+13-13)
- (modified) llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
(+1-1)
- (modified)
llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll (+9-9)
- (modified) llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll (+21-21)
- (modified) llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll (+5-7)
- (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
(+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll (+2-2)
- (modified)
llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll (+2-2)
- (modified)
llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
(+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
(+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
(+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll (+1-1)
``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index cb49936871e74..65d049ed9a0aa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1504,7 +1504,6 @@ static bool runImpl(Module &M, AnalysisGetter &AG,
TargetMachine &TM,
A.getOrCreateAAFor(IRPosition::function(*F));
A.getOrCreateAAFor(IRPosition::function(*F));
A.getOrCreateAAFor(IRPosition::function(*F));
-A.getOrCreateAAFor(IRPosition::function(*F));
CallingConv::ID CC = F->getCallingConv();
if (!AMDGPU::isEntryFunctionCC(CC)) {
A.getOrCreateAAFor(IRPosition::function(*F));
@@ -1515,6 +1514,9 @@ static bool runImpl(Module &M, AnalysisGetter &AG,
TargetMachine &TM,
if (!F->isDeclaration() && ST.hasClusters())
A.getOrCreateAAFor(IRPosition::function(*F));
+if (ST.hasGFX90AInsts())
+ A.getOrCreateAAFor(IRPosition::function(*F));
+
for (auto &I : instructions(F)) {
Value *Ptr = nullptr;
if (auto *LI = dyn_cast(&I))
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index 2d7ef2c262157..98fbbe1a515ed 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -169,6 +169,6 @@ attributes #1 = { nounwind }
;.
; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn
memory(argmem: readwrite) }
-; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0"
"amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z"
"amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id"
"amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr"
"amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id"
"amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x"
"amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x"
"amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
"uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0"
"amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z"
"amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id"
"amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr"
"amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id"
"amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x"
"amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-i
[llvm-branch-commits] [flang] [llvm] [mlir] [Flang][MLIR][OpenMP] Add explicit shared memory (de-)allocation ops (PR #161862)
llvmbot wrote:
@llvm/pr-subscribers-mlir-llvm
@llvm/pr-subscribers-flang-codegen
Author: Sergio Afonso (skatrak)
Changes
This patch introduces the `omp.alloc_shared_mem` and `omp.free_shared_mem`
operations to represent explicit allocations and deallocations of shared memory
across threads in a team, mirroring the existing `omp.target_allocmem` and
`omp.target_freemem`.
The `omp.alloc_shared_mem` op goes through the same Flang-specific
transformations as `omp.target_allocmem`, so that the size of the buffer can be
properly calculated when translating to LLVM IR.
The corresponding runtime functions produced for these new operations are
`__kmpc_alloc_shared` and `__kmpc_free_shared`, which previously could only be
created for implicit allocations (e.g. privatized and reduction variables).
---
Patch is 21.51 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/161862.diff
8 Files Affected:
- (modified) flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp (+28-15)
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+23)
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+21-8)
- (modified) mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td (+62)
- (modified) mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp (+22)
- (modified)
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+55-13)
- (modified) mlir/test/Dialect/OpenMP/invalid.mlir (+28)
- (modified) mlir/test/Dialect/OpenMP/ops.mlir (+29-2)
``diff
diff --git a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
index 381b2a29c517a..c1a6b06d6a52b 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGenOpenMP.cpp
@@ -222,35 +222,47 @@ static mlir::Type convertObjectType(const
fir::LLVMTypeConverter &converter,
return converter.convertType(firType);
}
-// FIR Op specific conversion for TargetAllocMemOp
-struct TargetAllocMemOpConversion
-: public OpenMPFIROpConversion {
- using OpenMPFIROpConversion::OpenMPFIROpConversion;
+// FIR Op specific conversion for allocation operations
+template
+struct AllocMemOpConversion : public OpenMPFIROpConversion {
+ using OpenMPFIROpConversion::OpenMPFIROpConversion;
llvm::LogicalResult
- matchAndRewrite(mlir::omp::TargetAllocMemOp allocmemOp, OpAdaptor adaptor,
+ matchAndRewrite(T allocmemOp,
+ typename OpenMPFIROpConversion::OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
mlir::Type heapTy = allocmemOp.getAllocatedType();
mlir::Location loc = allocmemOp.getLoc();
-auto ity = lowerTy().indexType();
+auto ity = OpenMPFIROpConversion::lowerTy().indexType();
mlir::Type dataTy = fir::unwrapRefType(heapTy);
-mlir::Type llvmObjectTy = convertObjectType(lowerTy(), dataTy);
+mlir::Type llvmObjectTy =
+convertObjectType(OpenMPFIROpConversion::lowerTy(), dataTy);
if (fir::isRecordWithTypeParameters(fir::unwrapSequenceType(dataTy)))
- TODO(loc, "omp.target_allocmem codegen of derived type with length "
-"parameters");
+ TODO(loc, allocmemOp->getName().getStringRef() +
+" codegen of derived type with length parameters");
mlir::Value size = fir::computeElementDistance(
-loc, llvmObjectTy, ity, rewriter, lowerTy().getDataLayout());
+loc, llvmObjectTy, ity, rewriter,
+OpenMPFIROpConversion::lowerTy().getDataLayout());
if (auto scaleSize = fir::genAllocationScaleSize(
loc, allocmemOp.getInType(), ity, rewriter))
size = rewriter.create(loc, ity, size, scaleSize);
-for (mlir::Value opnd : adaptor.getOperands().drop_front())
+for (mlir::Value opnd : adaptor.getTypeparams())
+ size = rewriter.create(
+ loc, ity, size,
+ integerCast(OpenMPFIROpConversion::lowerTy(), loc, rewriter, ity,
+ opnd));
+for (mlir::Value opnd : adaptor.getShape())
size = rewriter.create(
- loc, ity, size, integerCast(lowerTy(), loc, rewriter, ity, opnd));
-auto mallocTyWidth = lowerTy().getIndexTypeBitwidth();
+ loc, ity, size,
+ integerCast(OpenMPFIROpConversion::lowerTy(), loc, rewriter, ity,
+ opnd));
+auto mallocTyWidth =
+OpenMPFIROpConversion::lowerTy().getIndexTypeBitwidth();
auto mallocTy =
mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth);
if (mallocTyWidth != ity.getIntOrFloatBitWidth())
- size = integerCast(lowerTy(), loc, rewriter, mallocTy, size);
+ size = integerCast(OpenMPFIROpConversion::lowerTy(), loc, rewriter,
+ mallocTy, size);
rewriter.modifyOpInPlace(allocmemOp, [&]() {
allocmemOp.setInType(rewriter.getI8Type());
allocmemOp.getTypeparamsMutable().clear();
@@ -265,5 +277,6 @@ void fir::populateOpenMPFIRToLLVMConversionPatterns(
[llvm-branch-commits] [llvm] [Github] Add stefanp-synopsys to release uploaders (PR #148643)
tstellar wrote: release/20.x branch is frozen now so closing this. https://github.com/llvm/llvm-project/pull/148643 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Github] Add stefanp-synopsys to release uploaders (PR #148643)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/148643 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libc] [llvm] [libc][math] Refactor exp2f16 implementation to header-only in src/__support/math folder. (PR #161993)
https://github.com/bassiounix created
https://github.com/llvm/llvm-project/pull/161993
None
>From 8bf93446dccaff1c82aa9a3a10ca54049176 Mon Sep 17 00:00:00 2001
From: bassiounix
Date: Sun, 5 Oct 2025 06:48:10 +0300
Subject: [PATCH] [libc][math] Refactor exp2f16 implementation to header-only
in src/__support/math folder.
---
libc/shared/math.h| 1 +
libc/shared/math/exp2f16.h| 29 +
libc/src/__support/math/CMakeLists.txt| 14 +++
libc/src/__support/math/exp2f16.h | 111 ++
libc/src/math/generic/CMakeLists.txt | 10 +-
libc/src/math/generic/exp2f16.cpp | 86 +-
libc/test/shared/CMakeLists.txt | 1 +
libc/test/shared/shared_math_test.cpp | 2 +-
.../llvm-project-overlay/libc/BUILD.bazel | 18 ++-
9 files changed, 177 insertions(+), 95 deletions(-)
create mode 100644 libc/shared/math/exp2f16.h
create mode 100644 libc/src/__support/math/exp2f16.h
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 1262fa6f682d0..8bff70f1c5336 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -49,6 +49,7 @@
#include "math/exp10m1f16.h"
#include "math/exp2.h"
#include "math/exp2f.h"
+#include "math/exp2f16.h"
#include "math/expf.h"
#include "math/expf16.h"
#include "math/frexpf.h"
diff --git a/libc/shared/math/exp2f16.h b/libc/shared/math/exp2f16.h
new file mode 100644
index 0..f799511efb0d7
--- /dev/null
+++ b/libc/shared/math/exp2f16.h
@@ -0,0 +1,29 @@
+//===-- Shared exp2f16 function -*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXP2F16_H
+#define LLVM_LIBC_SHARED_MATH_EXP2F16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+#include "shared/libc_common.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/math/exp2f16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::exp2f16;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_EXP2F16_H
diff --git a/libc/src/__support/math/CMakeLists.txt
b/libc/src/__support/math/CMakeLists.txt
index 203ebb4bf1760..185900efa7354 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -753,6 +753,20 @@ add_header_library(
libc.src.errno.errno
)
+add_header_library(
+ exp2f16
+ HDRS
+exp2f16.h
+ DEPENDS
+.expxf16_utils
+libc.src.__support.FPUtil.cast
+libc.src.__support.FPUtil.except_value_utils
+libc.src.__support.FPUtil.fenv_impl
+libc.src.__support.FPUtil.fp_bits
+libc.src.__support.FPUtil.rounding_mode
+libc.src.__support.macros.optimization
+)
+
add_header_library(
exp10
HDRS
diff --git a/libc/src/__support/math/exp2f16.h
b/libc/src/__support/math/exp2f16.h
new file mode 100644
index 0..599ba0f5411bd
--- /dev/null
+++ b/libc/src/__support/math/exp2f16.h
@@ -0,0 +1,111 @@
+//===-- Implementation header for exp2f16 ---*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP2F16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP2F16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "expxf16_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float16 exp2f16(float16 x) {
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ constexpr fputil::ExceptValues EXP2F16_EXCEPTS = {{
+ // (input, RZ output, RU offset, RD offset, RN offset)
+ // x = 0x1.714p-11, exp2f16(x) = 0x1p+0 (RZ)
+ {0x11c5U, 0x3c00U, 1U, 0U, 1U},
+ // x = -0x1.558p-4, exp2f16(x) = 0x1.e34p-1 (RZ)
+ {0xad56U, 0x3b8dU, 1U, 0U, 0U},
+ // x = -0x1.d5cp-4, exp2f16(x) = 0x1.d8cp-1 (RZ)
+ {0xaf57U, 0x3b63U, 1U, 0U, 0U},
+ }};
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+ using namespace math::expxf16_internal;
+ using FPBits = fputil::FPBits;
+ FPBits x_bits(x);
+
+ uint16_t x_u = x_bits.uintval();
[llvm-branch-commits] [llvm] AMDGPU: Stop using the wavemask register class for SCC cross class copies (PR #161801)
https://github.com/rampitec approved this pull request. That's a scalar condition, so really does not depend on wave size. LGTM. https://github.com/llvm/llvm-project/pull/161801 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Remove LDS_DIRECT_CLASS register class (PR #161762)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/161762
>From 53a6a5b9e3adcabc51e7eff0a21642f33859b946 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 3 Oct 2025 10:21:10 +0900
Subject: [PATCH] AMDGPU: Remove LDS_DIRECT_CLASS register class
This is a singleton register class which is a bad idea,
and not actually used.
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 20 +-
.../GlobalISel/irtranslator-inline-asm.ll | 2 +-
.../coalesce-copy-to-agpr-to-av-registers.mir | 232 +-
...class-vgpr-mfma-to-av-with-load-source.mir | 12 +-
llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll | 24 +-
...al-regcopy-and-spill-missed-at-regalloc.ll | 16 +-
...lloc-failure-overlapping-insert-assert.mir | 12 +-
.../rewrite-vgpr-mfma-to-agpr-copy-from.mir | 4 +-
...gpr-mfma-to-agpr-subreg-insert-extract.mir | 12 +-
...te-vgpr-mfma-to-agpr-subreg-src2-chain.mir | 32 +--
.../CodeGen/AMDGPU/spill-vector-superclass.ll | 2 +-
.../Inputs/amdgpu_isel.ll.expected| 4 +-
12 files changed, 183 insertions(+), 189 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index f98e31229b246..82fc2400a3754 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -761,12 +761,6 @@ def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU",
Reg128Types.types, 32,
let BaseClassOrder = 1;
}
-def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32,
- (add LDS_DIRECT)> {
- let isAllocatable = 0;
- let CopyCost = -1;
-}
-
let GeneratePressureSet = 0, HasSGPR = 1 in {
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
@@ -829,7 +823,7 @@ def SGPR_NULL256 : SIReg<"null">;
let GeneratePressureSet = 0 in {
def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16,
v2f16, v2bf16], 32,
- (add SReg_32, LDS_DIRECT_CLASS)> {
+ (add SReg_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasSGPR = 1;
let Size = 32;
@@ -968,7 +962,7 @@ defm "" : SRegClass<32, Reg1024Types.types, SGPR_1024Regs>;
}
def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16,
v2f16, v2bf16], 32,
- (add VGPR_32, LDS_DIRECT_CLASS)> {
+ (add VGPR_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasVGPR = 1;
let Size = 32;
@@ -1083,21 +1077,21 @@ def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add)>
{
}
def VS_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
- (add VGPR_16, SReg_32, LDS_DIRECT_CLASS)> {
+ (add VGPR_16, SReg_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasVGPR = 1;
let Size = 16;
}
def VS_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
- (add VGPR_16_Lo128, SReg_32, LDS_DIRECT_CLASS)> {
+ (add VGPR_16_Lo128, SReg_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasVGPR = 1;
let Size = 16;
}
def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16,
v2bf16], 32,
- (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
+ (add VGPR_32, SReg_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasVGPR = 1;
let HasSGPR = 1;
@@ -1105,7 +1099,7 @@ def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16,
f16, bf16, v2i16, v2f16, v
}
def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16,
v2f16, v2bf16], 32,
- (add VGPR_32_Lo128, SReg_32, LDS_DIRECT_CLASS)> {
+ (add VGPR_32_Lo128, SReg_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasVGPR = 1;
let HasSGPR = 1;
@@ -1113,7 +1107,7 @@ def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32,
i16, f16, bf16, v2i16, v2
}
def VS_32_Lo256 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16,
v2f16, v2bf16], 32,
- (add VGPR_32_Lo256, SReg_32,
LDS_DIRECT_CLASS)> {
+ (add VGPR_32_Lo256, SReg_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasVGPR = 1;
let HasSGPR = 1;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
index a54dc9dda16e0..e5cd0710359ac 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
@@ -136,7 +136,7 @@ define float @test_multiple_register_outputs_same() #0 {
define double @test_multiple_register_outputs_mixed() #0 {
; CHECK-LABEL: name: test_multiple_register_outputs_mixed
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /*
attdialect */, 1835018 /* regdef:VGPR_32 */, def %8, 3473418 /* regdef:VReg_64
*/, def %9
+
[llvm-branch-commits] [llvm] TableGen: Support target specialized pseudoinstructions (PR #159880)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/159880
>From 74bae94efb361527a6ad5db59e127014dc0c65c3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Mon, 15 Sep 2025 22:41:07 +0900
Subject: [PATCH] TableGen: Support target specialized pseudoinstructions
Allow a target to steal the definition of a generic pseudoinstruction
and remap the operands. This works by defining a new instruction, which
will simply swap out the emitted entry in the InstrInfo table.
This is intended to eliminate the C++ half of the implementation
of PointerLikeRegClass. With RegClassByHwMode, the remaining usecase
for PointerLikeRegClass are the common codegen pseudoinstructions.
Every target maintains its own copy of the generic pseudo operand
definitions anyway, so we can stub out the register operands with
an appropriate class instead of waiting for runtime resolution.
In the future we could probably take this a bit further. For example,
there is a similar problem for ADJCALLSTACKUP/DOWN since they depend
on target register definitions for the stack pointer register.
---
llvm/include/llvm/Target/Target.td| 93
.../TableGen/target-specialized-pseudos.td| 101 ++
llvm/utils/TableGen/Common/CodeGenTarget.cpp | 12 ++-
llvm/utils/TableGen/InstrInfoEmitter.cpp | 37 +++
4 files changed, 242 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/TableGen/target-specialized-pseudos.td
diff --git a/llvm/include/llvm/Target/Target.td
b/llvm/include/llvm/Target/Target.td
index 13175177edd3e..4a759a99d1d25 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1574,6 +1574,99 @@ def CONVERGENCECTRL_GLUE : StandardPseudoInstruction {
}
}
+/// Allow a target to replace the instruction definition of a
+/// StandardPseudoInstruction. A target should only define one
+/// instance of this per instruction.
+///
+/// This is intended to allow targets to specify the register class
+/// used for pointers. It should not be used to change the fundamental
+/// operand structure (e.g., this should not add or remove operands,
+/// or change the operand types).
+class TargetSpecializedStandardPseudoInstruction<
+ StandardPseudoInstruction base_inst> : Instruction {
+
+ StandardPseudoInstruction Instruction = base_inst;
+ let OutOperandList = base_inst.OutOperandList;
+ let InOperandList = base_inst.InOperandList;
+
+ // TODO: Copy everything
+ let usesCustomInserter = base_inst.usesCustomInserter;
+ let hasSideEffects = base_inst.hasSideEffects;
+ let mayLoad = base_inst.mayLoad;
+ let mayStore = base_inst.mayStore;
+ let isTerminator = base_inst.isTerminator;
+ let isBranch = base_inst.isBranch;
+ let isIndirectBranch = base_inst.isIndirectBranch;
+ let isEHScopeReturn = base_inst.isEHScopeReturn;
+ let isReturn = base_inst.isReturn;
+ let isCall = base_inst.isCall;
+ let hasCtrlDep = base_inst.hasCtrlDep;
+ let isReMaterializable = base_inst.isReMaterializable;
+ let isMeta = base_inst.isMeta;
+ let Size = base_inst.Size;
+ let isAsCheapAsAMove = base_inst.isAsCheapAsAMove;
+ let isPseudo = true;
+ let hasNoSchedulingInfo = true;
+ let isNotDuplicable = base_inst.isNotDuplicable;
+ let isConvergent = base_inst.isConvergent;
+ let hasExtraSrcRegAllocReq = base_inst.hasExtraSrcRegAllocReq;
+ let hasExtraDefRegAllocReq = base_inst.hasExtraDefRegAllocReq;
+}
+
+// All pseudo instructions which need a pointer register class, which
+// should be specialized by a target.
+defvar PseudosWithPtrOps = [
+ LOAD_STACK_GUARD,
+ PREALLOCATED_ARG,
+ PATCHABLE_EVENT_CALL,
+ PATCHABLE_TYPED_EVENT_CALL
+];
+
+
+/// Replace PointerLikeRegClass operands in OperandList with new_rc.
+class RemapPointerOperandList {
+ // Collect the set of names so we can query and rewrite them.
+ list op_names = !foreach(i, !range(!size(OperandList)),
+ !getdagname(OperandList, i));
+
+ // Beautiful language. This would be a lot easier if !getdagarg
+ // didn't require a specific type. We can't just collect a list of
+ // the operand values and reconstruct the dag, since there isn't a
+ // common base class for all the field kinds used in
+ // pseudoinstruction definitions; therefore everything must be
+ // maintained as a dag, so use a foldl. Additionally, ? doesn't
+ // evaluate as false so we get even more noise.
+ dag ret =
+!foldl(OperandList, op_names, acc, name,
+ !cond(
+!initialized(!getdagarg(OperandList, name))
+ : !setdagarg(acc, name, new_rc),
+!initialized(!getdagarg(OperandList, name)) : acc,
+!initialized(!getdagarg(OperandList, name)) : acc
+ )
+);
+}
+
+/// Define an override for a pseudoinstruction which uses a pointer
+/// register class, specialized to the target's pointer type.
+class RemapPointerOperands :
+ TargetSpecializedStandardPseudoInstruction {
+ let OutOperandList =
+RemapPointerOper
[llvm-branch-commits] [llvm] CodeGen: Make target overrides of PointerLikeRegClass mandatory (PR #159882)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/159882
>From a8f90376a5de997c3361afe8a87b28a10c6a40a5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Tue, 16 Sep 2025 14:54:15 +0900
Subject: [PATCH] CodeGen: Make target overrides of PointerLikeRegClass
mandatory
Most targets should now use the convenience multiclass to fixup
the operand definitions of pointer-using pseudoinstructions:
defm : RemapAllTargetPseudoPointerOperands;
---
llvm/test/TableGen/DuplicateFieldValues.td| 1 +
llvm/test/TableGen/RegClassByHwMode.td| 15 +++-
llvm/test/TableGen/def-multiple-operands.td | 2 ++
llvm/test/TableGen/get-named-operand-idx.td | 2 ++
.../TableGen/get-operand-type-no-expand.td| 2 ++
llvm/test/TableGen/get-operand-type.td| 2 ++
.../TableGen/target-specialized-pseudos.td| 34 ++-
llvm/utils/TableGen/InstrInfoEmitter.cpp | 18 --
8 files changed, 64 insertions(+), 12 deletions(-)
diff --git a/llvm/test/TableGen/DuplicateFieldValues.td
b/llvm/test/TableGen/DuplicateFieldValues.td
index 50c77fa88ccec..85cb5bbfb6c56 100644
--- a/llvm/test/TableGen/DuplicateFieldValues.td
+++ b/llvm/test/TableGen/DuplicateFieldValues.td
@@ -82,3 +82,4 @@ let BaseName = "0" in {
def E0 : I, ABCRel, isEForm;
}
+defm : RemapAllTargetPseudoPointerOperands;
diff --git a/llvm/test/TableGen/RegClassByHwMode.td
b/llvm/test/TableGen/RegClassByHwMode.td
index ca72cfbd403bf..32fa9c17c60be 100644
--- a/llvm/test/TableGen/RegClassByHwMode.td
+++ b/llvm/test/TableGen/RegClassByHwMode.td
@@ -11,6 +11,7 @@ include "llvm/Target/Target.td"
// INSTRINFO-NEXT: namespace llvm::MyTarget {
// INSTRINFO-NEXT: enum {
// INSTRINFO-NEXT: PHI
+// INSTRINFO: LOAD_STACK_GUARD = [[LOAD_STACK_GUARD_OPCODE:[0-9]+]]
// INSTRINFO: };
// INSTRINFO: enum RegClassByHwModeUses : uint16_t {
// INSTRINFO-NEXT: MyPtrRC,
@@ -19,10 +20,20 @@ include "llvm/Target/Target.td"
// INSTRINFO-NEXT: };
// INSTRINFO-NEXT: }
+
+// INSTRINFO: { [[LOAD_STACK_GUARD_OPCODE]], 1, 1, 0, 0,
0, 0, [[LOAD_STACK_GUARD_OP_INDEX:[0-9]+]], MyTargetImpOpBase + 0,
0|(1ULL<;
+defm : RemapAllTargetPseudoPointerOperands;
+
def MyTargetISA : InstrInfo;
def MyTarget : Target { let InstructionSet = MyTargetISA; }
diff --git a/llvm/test/TableGen/def-multiple-operands.td
b/llvm/test/TableGen/def-multiple-operands.td
index 5d215056920e8..dc5ea09eff9ba 100644
--- a/llvm/test/TableGen/def-multiple-operands.td
+++ b/llvm/test/TableGen/def-multiple-operands.td
@@ -35,3 +35,5 @@ def InstA : Instruction {
field bits<8> SoftFail = 0;
let hasSideEffects = false;
}
+
+defm : RemapAllTargetPseudoPointerOperands;
diff --git a/llvm/test/TableGen/get-named-operand-idx.td
b/llvm/test/TableGen/get-named-operand-idx.td
index e6f6331cd9c48..4626fb6439edd 100644
--- a/llvm/test/TableGen/get-named-operand-idx.td
+++ b/llvm/test/TableGen/get-named-operand-idx.td
@@ -48,6 +48,8 @@ def InstD : InstBase {
let UseNamedOperandTable = 0;
}
+defm : RemapAllTargetPseudoPointerOperands;
+
// CHECK-LABEL: #ifdef GET_INSTRINFO_OPERAND_ENUM
// CHECK-NEXT: #undef GET_INSTRINFO_OPERAND_ENUM
// CHECK-NEXT: namespace llvm::MyNamespace {
diff --git a/llvm/test/TableGen/get-operand-type-no-expand.td
b/llvm/test/TableGen/get-operand-type-no-expand.td
index a0a8fa957f9b6..fcaf3684528b2 100644
--- a/llvm/test/TableGen/get-operand-type-no-expand.td
+++ b/llvm/test/TableGen/get-operand-type-no-expand.td
@@ -46,3 +46,5 @@ def InstA : Instruction {
// CHECK-NOEXPAND:/* InstA */
// CHECK-NOEXPAND-NEXT: i512complex, i8complex, i32imm,
// CHECK-NOEXPAND: #endif // GET_INSTRINFO_OPERAND_TYPE
+
+defm : RemapAllTargetPseudoPointerOperands;
diff --git a/llvm/test/TableGen/get-operand-type.td
b/llvm/test/TableGen/get-operand-type.td
index b2f63cafd6a89..49fbb63ac5974 100644
--- a/llvm/test/TableGen/get-operand-type.td
+++ b/llvm/test/TableGen/get-operand-type.td
@@ -18,6 +18,8 @@ def OpB : Operand;
def RegOp : RegisterOperand;
+defm : RemapAllTargetPseudoPointerOperands;
+
def InstA : Instruction {
let Size = 1;
let OutOperandList = (outs OpA:$a);
diff --git a/llvm/test/TableGen/target-specialized-pseudos.td
b/llvm/test/TableGen/target-specialized-pseudos.td
index 99c63f3ec29d9..3953a36101fe0 100644
--- a/llvm/test/TableGen/target-specialized-pseudos.td
+++ b/llvm/test/TableGen/target-specialized-pseudos.td
@@ -1,6 +1,11 @@
-// RUN: llvm-tblgen -gen-instr-info -I %p/../../include %s -DONECASE -o - |
FileCheck -check-prefixes=CHECK,ONECASE %s
// RUN: llvm-tblgen -gen-instr-info -I %p/../../include %s -DALLCASES -o - |
FileCheck -check-prefixes=CHECK,ALLCASES %s
-// RUN: not llvm-tblgen -gen-instr-info -I %p/../../include %s -DERROR -o
/dev/null 2>&1 | FileCheck -check-prefix=ERROR %s
+// RUN: not llvm-tblgen -gen-instr-info -I %p/../../include %s -DONECASE -o
/dev/null 2>&1 | FileCheck -check-prefixes=ERROR-MISSING %s
+// RUN: not llvm-tblge
[llvm-branch-commits] [llvm] CodeGen: Make all targets override pseudos with pointers (PR #159881)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/159881
>From 64f0e18b0875e636705e09541ca99c6665ea19b7 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Tue, 16 Sep 2025 14:55:52 +0900
Subject: [PATCH] CodeGen: Make all targets override pseudos with pointers
This eliminates the need to have PointerLikeRegClass handling in
codegen.
---
llvm/lib/Target/AArch64/AArch64.td | 2 ++
llvm/lib/Target/AMDGPU/R600.td | 21 -
llvm/lib/Target/AMDGPU/SIInstructions.td | 11 +++
llvm/lib/Target/ARM/ARM.td | 8
llvm/lib/Target/AVR/AVR.td | 2 ++
llvm/lib/Target/BPF/BPF.td | 3 +++
llvm/lib/Target/CSKY/CSKY.td | 2 ++
llvm/lib/Target/DirectX/DirectX.td | 2 ++
llvm/lib/Target/Hexagon/Hexagon.td | 2 ++
llvm/lib/Target/Lanai/Lanai.td | 2 ++
llvm/lib/Target/LoongArch/LoongArch.td | 2 ++
llvm/lib/Target/M68k/M68k.td | 2 ++
llvm/lib/Target/MSP430/MSP430.td | 2 ++
llvm/lib/Target/Mips/Mips.td | 2 ++
llvm/lib/Target/NVPTX/NVPTX.td | 10 ++
llvm/lib/Target/PowerPC/PPC.td | 2 ++
llvm/lib/Target/PowerPC/PPCRegisterInfo.td | 4
llvm/lib/Target/RISCV/RISCV.td | 2 ++
llvm/lib/Target/SPIRV/SPIRV.td | 2 ++
llvm/lib/Target/Sparc/Sparc.td | 2 ++
llvm/lib/Target/SystemZ/SystemZ.td | 4 +++-
llvm/lib/Target/VE/VE.td | 1 +
llvm/lib/Target/WebAssembly/WebAssembly.td | 8
llvm/lib/Target/X86/X86.td | 2 ++
llvm/lib/Target/XCore/XCore.td | 2 ++
llvm/lib/Target/Xtensa/Xtensa.td | 2 ++
26 files changed, 94 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64.td
b/llvm/lib/Target/AArch64/AArch64.td
index 86f95488e6bb7..d98c235dab15e 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -40,6 +40,8 @@ include "AArch64SchedPredExynos.td"
include "AArch64SchedPredNeoverse.td"
include "AArch64Combine.td"
+defm : RemapAllTargetPseudoPointerOperands;
+
def AArch64InstrInfo : InstrInfo;
//===--===//
diff --git a/llvm/lib/Target/AMDGPU/R600.td b/llvm/lib/Target/AMDGPU/R600.td
index 9148edb92b084..bdfaac9f42ea7 100644
--- a/llvm/lib/Target/AMDGPU/R600.td
+++ b/llvm/lib/Target/AMDGPU/R600.td
@@ -8,15 +8,6 @@
include "llvm/Target/Target.td"
-def R600InstrInfo : InstrInfo {
- let guessInstructionProperties = 1;
-}
-
-def R600 : Target {
- let InstructionSet = R600InstrInfo;
- let AllowRegisterRenaming = 1;
-}
-
let Namespace = "R600" in {
foreach Index = 0-15 in {
@@ -27,6 +18,18 @@ include "R600RegisterInfo.td"
}
+defm : RemapAllTargetPseudoPointerOperands;
+
+def R600InstrInfo : InstrInfo {
+ let guessInstructionProperties = 1;
+}
+
+def R600 : Target {
+ let InstructionSet = R600InstrInfo;
+ let AllowRegisterRenaming = 1;
+}
+
+
def NullALU : InstrItinClass;
def ALU_NULL : FuncUnit;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td
b/llvm/lib/Target/AMDGPU/SIInstructions.td
index eac9fd44cd9a6..457dfb4613219 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -4732,3 +4732,14 @@ def V_ILLEGAL : Enc32, InstSI<(outs), (ins),
"v_illegal"> {
let hasSideEffects = 1;
let SubtargetPredicate = isGFX10Plus;
}
+
+defvar VGPR32_Ptr_Opcodes = [LOAD_STACK_GUARD];
+defvar VGPR64_Ptr_Opcodes = !listremove(PseudosWithPtrOps, VGPR32_Ptr_Opcodes);
+
+foreach inst = VGPR32_Ptr_Opcodes in {
+ def : RemapPointerOperands;
+}
+
+foreach inst = VGPR64_Ptr_Opcodes in {
+ def : RemapPointerOperands;
+}
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 570aae9b3c7a7..1f71d810983db 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -38,6 +38,14 @@ include "ARMSchedule.td"
//===--===//
include "ARMInstrInfo.td"
+
+def Thumb1OnlyMode : HwMode<[IsThumb1Only]>;
+def arm_ptr_rc : RegClassByHwMode<
+ [DefaultMode, Thumb1OnlyMode],
+ [GPR, tGPR]>;
+
+defm : RemapAllTargetPseudoPointerOperands;
+
def ARMInstrInfo : InstrInfo;
//===--===//
diff --git a/llvm/lib/Target/AVR/AVR.td b/llvm/lib/Target/AVR/AVR.td
index 22ffc4a368ad6..f4ee11984cb73 100644
--- a/llvm/lib/Target/AVR/AVR.td
+++ b/llvm/lib/Target/AVR/AVR.td
@@ -32,6 +32,8 @@ include "AVRRegisterInfo.td"
include "AVRInstrInfo.td"
+defm : RemapAllTargetPseudoPointerOperands;
+
def AVRInstrInfo : InstrInfo;
//===-===//
diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td
index dff76ca07af51..399be731b44f6 100644
--- a/llvm/lib/
[llvm-branch-commits] [llvm] AMDGPU: Stop using aligned VGPR classes for addRegisterClass (PR #158278)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/158278
>From bad7300404b215bc299b68524cd666f6752925c7 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 12 Sep 2025 20:45:56 +0900
Subject: [PATCH] AMDGPU: Stop using aligned VGPR classes for addRegisterClass
This is unnecessary. At use emission time, InstrEmitter will
use the common subclass of the value type's register class and
the use instruction register classes. This removes one of the
obstacles to treating special case instructions that do not have
the alignment requirement overly conservatively.
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 32 +++
llvm/test/CodeGen/AMDGPU/mfma-loop.ll | 14 +-
2 files changed, 24 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e2334577884b7..750e753c9de0e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -103,52 +103,52 @@ SITargetLowering::SITargetLowering(const TargetMachine
&TM,
addRegisterClass(MVT::Untyped, V64RegClass);
addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
- addRegisterClass(MVT::v3f32, TRI->getVGPRClassForBitWidth(96));
+ addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass);
addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass);
- addRegisterClass(MVT::v4f32, TRI->getVGPRClassForBitWidth(128));
+ addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
- addRegisterClass(MVT::v5f32, TRI->getVGPRClassForBitWidth(160));
+ addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass);
addRegisterClass(MVT::v6i32, &AMDGPU::SGPR_192RegClass);
- addRegisterClass(MVT::v6f32, TRI->getVGPRClassForBitWidth(192));
+ addRegisterClass(MVT::v6f32, &AMDGPU::VReg_192RegClass);
addRegisterClass(MVT::v3i64, &AMDGPU::SGPR_192RegClass);
- addRegisterClass(MVT::v3f64, TRI->getVGPRClassForBitWidth(192));
+ addRegisterClass(MVT::v3f64, &AMDGPU::VReg_192RegClass);
addRegisterClass(MVT::v7i32, &AMDGPU::SGPR_224RegClass);
- addRegisterClass(MVT::v7f32, TRI->getVGPRClassForBitWidth(224));
+ addRegisterClass(MVT::v7f32, &AMDGPU::VReg_224RegClass);
addRegisterClass(MVT::v8i32, &AMDGPU::SGPR_256RegClass);
- addRegisterClass(MVT::v8f32, TRI->getVGPRClassForBitWidth(256));
+ addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);
- addRegisterClass(MVT::v4f64, TRI->getVGPRClassForBitWidth(256));
+ addRegisterClass(MVT::v4f64, &AMDGPU::VReg_256RegClass);
addRegisterClass(MVT::v9i32, &AMDGPU::SGPR_288RegClass);
- addRegisterClass(MVT::v9f32, TRI->getVGPRClassForBitWidth(288));
+ addRegisterClass(MVT::v9f32, &AMDGPU::VReg_288RegClass);
addRegisterClass(MVT::v10i32, &AMDGPU::SGPR_320RegClass);
- addRegisterClass(MVT::v10f32, TRI->getVGPRClassForBitWidth(320));
+ addRegisterClass(MVT::v10f32, &AMDGPU::VReg_320RegClass);
addRegisterClass(MVT::v11i32, &AMDGPU::SGPR_352RegClass);
- addRegisterClass(MVT::v11f32, TRI->getVGPRClassForBitWidth(352));
+ addRegisterClass(MVT::v11f32, &AMDGPU::VReg_352RegClass);
addRegisterClass(MVT::v12i32, &AMDGPU::SGPR_384RegClass);
- addRegisterClass(MVT::v12f32, TRI->getVGPRClassForBitWidth(384));
+ addRegisterClass(MVT::v12f32, &AMDGPU::VReg_384RegClass);
addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);
- addRegisterClass(MVT::v16f32, TRI->getVGPRClassForBitWidth(512));
+ addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass);
- addRegisterClass(MVT::v8f64, TRI->getVGPRClassForBitWidth(512));
+ addRegisterClass(MVT::v8f64, &AMDGPU::VReg_512RegClass);
addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass);
- addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024));
+ addRegisterClass(MVT::v16f64, &AMDGPU::VReg_1024RegClass);
if (Subtarget->has16BitInsts()) {
if (Subtarget->useRealTrue16Insts()) {
@@ -180,7 +180,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
- addRegisterClass(MVT::v32f32, TRI->getVGPRClassForBitWidth(1024));
+ addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
computeRegisterProperties(Subtarget->getRegisterInfo());
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
index 0af655dfbbee9..4bb653848cbf0 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -2399,8 +2399,9 @@ define amdgpu_kernel void
@test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
; GFX90A-NEXT:v_accvgpr_mov_b32 a29, a0
; GFX90A-NEXT:v_accvgpr_mov_b32 a30, a0
; GFX90A-NEXT
[llvm-branch-commits] [llvm] AMDGPU: Remove wrapper around TRI::getRegClass (PR #159885)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/159885
>From bcaa5f77561898d1c0a83e7be20e5673b0932781 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Wed, 17 Sep 2025 21:14:02 +0900
Subject: [PATCH] AMDGPU: Remove wrapper around TRI::getRegClass
This shadows the member in the base class, but differs slightly
in behavior. The base method doesn't check for the invalid case.
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 7 ---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp| 5 +++--
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 11 ---
llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 2 --
4 files changed, 7 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 6616b30410590..44ab7715ca981 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1309,10 +1309,11 @@ void SIFoldOperandsImpl::foldOperand(
continue;
const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
- const TargetRegisterClass *MovSrcRC =
- TRI->getRegClass(TII->getOpRegClassID(MovDesc.operands()[SrcIdx]));
- if (MovSrcRC) {
+ int16_t RegClassID = TII->getOpRegClassID(MovDesc.operands()[SrcIdx]);
+ if (RegClassID != -1) {
+const TargetRegisterClass *MovSrcRC = TRI->getRegClass(RegClassID);
+
if (UseSubReg)
MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ec5c5bb349ac4..f2ad8886b3871 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6041,7 +6041,7 @@ SIInstrInfo::getRegClass(const MCInstrDesc &TID, unsigned
OpNum,
return nullptr;
const MCOperandInfo &OpInfo = TID.operands()[OpNum];
int16_t RegClass = getOpRegClassID(OpInfo);
- return RI.getRegClass(RegClass);
+ return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
}
const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
@@ -6059,7 +6059,8 @@ const TargetRegisterClass
*SIInstrInfo::getOpRegClass(const MachineInstr &MI,
return RI.getPhysRegBaseClass(Reg);
}
- return RI.getRegClass(getOpRegClassID(Desc.operands()[OpNo]));
+ int16_t RegClass = getOpRegClassID(Desc.operands()[OpNo]);
+ return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
}
void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 48cae6c868205..42c88360bd0ac 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3893,17 +3893,6 @@ const TargetRegisterClass
*SIRegisterInfo::getVGPR64Class() const {
: &AMDGPU::VReg_64RegClass;
}
-// FIXME: This should be deleted
-const TargetRegisterClass *
-SIRegisterInfo::getRegClass(unsigned RCID) const {
- switch ((int)RCID) {
- case -1:
-return nullptr;
- default:
-return AMDGPUGenRegisterInfo::getRegClass(RCID);
- }
-}
-
// Find reaching register definition
MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg,
MachineInstr &Use,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 7b91ba7bc581f..813f6bb1a503a 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -391,8 +391,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
MCRegister getExec() const;
- const TargetRegisterClass *getRegClass(unsigned RCID) const;
-
// Find reaching register definition
MachineInstr *findReachingDef(Register Reg, unsigned SubReg,
MachineInstr &Use,
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Stop using aligned VGPR classes for addRegisterClass (PR #158278)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/158278
>From bad7300404b215bc299b68524cd666f6752925c7 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 12 Sep 2025 20:45:56 +0900
Subject: [PATCH] AMDGPU: Stop using aligned VGPR classes for addRegisterClass
This is unnecessary. At use emission time, InstrEmitter will
use the common subclass of the value type's register class and
the use instruction register classes. This removes one of the
obstacles to treating special case instructions that do not have
the alignment requirement overly conservatively.
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 32 +++
llvm/test/CodeGen/AMDGPU/mfma-loop.ll | 14 +-
2 files changed, 24 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e2334577884b7..750e753c9de0e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -103,52 +103,52 @@ SITargetLowering::SITargetLowering(const TargetMachine
&TM,
addRegisterClass(MVT::Untyped, V64RegClass);
addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
- addRegisterClass(MVT::v3f32, TRI->getVGPRClassForBitWidth(96));
+ addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass);
addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass);
- addRegisterClass(MVT::v4f32, TRI->getVGPRClassForBitWidth(128));
+ addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
- addRegisterClass(MVT::v5f32, TRI->getVGPRClassForBitWidth(160));
+ addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass);
addRegisterClass(MVT::v6i32, &AMDGPU::SGPR_192RegClass);
- addRegisterClass(MVT::v6f32, TRI->getVGPRClassForBitWidth(192));
+ addRegisterClass(MVT::v6f32, &AMDGPU::VReg_192RegClass);
addRegisterClass(MVT::v3i64, &AMDGPU::SGPR_192RegClass);
- addRegisterClass(MVT::v3f64, TRI->getVGPRClassForBitWidth(192));
+ addRegisterClass(MVT::v3f64, &AMDGPU::VReg_192RegClass);
addRegisterClass(MVT::v7i32, &AMDGPU::SGPR_224RegClass);
- addRegisterClass(MVT::v7f32, TRI->getVGPRClassForBitWidth(224));
+ addRegisterClass(MVT::v7f32, &AMDGPU::VReg_224RegClass);
addRegisterClass(MVT::v8i32, &AMDGPU::SGPR_256RegClass);
- addRegisterClass(MVT::v8f32, TRI->getVGPRClassForBitWidth(256));
+ addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);
- addRegisterClass(MVT::v4f64, TRI->getVGPRClassForBitWidth(256));
+ addRegisterClass(MVT::v4f64, &AMDGPU::VReg_256RegClass);
addRegisterClass(MVT::v9i32, &AMDGPU::SGPR_288RegClass);
- addRegisterClass(MVT::v9f32, TRI->getVGPRClassForBitWidth(288));
+ addRegisterClass(MVT::v9f32, &AMDGPU::VReg_288RegClass);
addRegisterClass(MVT::v10i32, &AMDGPU::SGPR_320RegClass);
- addRegisterClass(MVT::v10f32, TRI->getVGPRClassForBitWidth(320));
+ addRegisterClass(MVT::v10f32, &AMDGPU::VReg_320RegClass);
addRegisterClass(MVT::v11i32, &AMDGPU::SGPR_352RegClass);
- addRegisterClass(MVT::v11f32, TRI->getVGPRClassForBitWidth(352));
+ addRegisterClass(MVT::v11f32, &AMDGPU::VReg_352RegClass);
addRegisterClass(MVT::v12i32, &AMDGPU::SGPR_384RegClass);
- addRegisterClass(MVT::v12f32, TRI->getVGPRClassForBitWidth(384));
+ addRegisterClass(MVT::v12f32, &AMDGPU::VReg_384RegClass);
addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);
- addRegisterClass(MVT::v16f32, TRI->getVGPRClassForBitWidth(512));
+ addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass);
- addRegisterClass(MVT::v8f64, TRI->getVGPRClassForBitWidth(512));
+ addRegisterClass(MVT::v8f64, &AMDGPU::VReg_512RegClass);
addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass);
- addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024));
+ addRegisterClass(MVT::v16f64, &AMDGPU::VReg_1024RegClass);
if (Subtarget->has16BitInsts()) {
if (Subtarget->useRealTrue16Insts()) {
@@ -180,7 +180,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
- addRegisterClass(MVT::v32f32, TRI->getVGPRClassForBitWidth(1024));
+ addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
computeRegisterProperties(Subtarget->getRegisterInfo());
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
index 0af655dfbbee9..4bb653848cbf0 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -2399,8 +2399,9 @@ define amdgpu_kernel void
@test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
; GFX90A-NEXT:v_accvgpr_mov_b32 a29, a0
; GFX90A-NEXT:v_accvgpr_mov_b32 a30, a0
; GFX90A-NEXT
