@@ -2038,15 +2038,15 @@ multiclass F_ATOMIC_2_AS,
preds>;
defm _S : F_ATOMIC_2,
preds>;
- defm _DS : F_ATOMIC_2, !listconcat([hasSM<80>], preds)>;
+ defm _S_C : F_ATOMIC_2, !listconcat([hasSM<80>], preds)>;
AlexMaclean wrote:
The PTX doc seems to say this
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 5
+; RUN: llc < %s -o - -mcpu=sm_90 -march=nvptx64 -mattr=+ptx80 | FileCheck %s
+; RUN: %if ptxas-12.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80|
%pt
@@ -137,6 +137,7 @@ def hasAtomBitwise64 :
Predicate<"Subtarget->hasAtomBitwise64()">;
def hasAtomMinMax64 : Predicate<"Subtarget->hasAtomMinMax64()">;
def hasVote : Predicate<"Subtarget->hasVote()">;
def hasDouble : Predicate<"Subtarget->hasDouble()">;
+def hasClusters : Pred
https://github.com/AlexMaclean edited
https://github.com/llvm/llvm-project/pull/135444
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 5
+; RUN: llc < %s -o - -mcpu=sm_90 -march=nvptx64 -mattr=+ptx80 | FileCheck %s
+; RUN: %if ptxas-12.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80|
%pt
@@ -0,0 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 5
+; RUN: llc < %s -o - -mcpu=sm_90 -march=nvptx64 -mattr=+ptx80 | FileCheck %s
+; RUN: %if ptxas-12.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80|
%pt
https://github.com/AlexMaclean commented:
Backend changes look reasonable so far. One concern I have with this change is
that until now we've assumed specific address-spaces are non-overlapping.
You've addressed some of the places where this assumption is encoded but I
think there are others y
https://github.com/AlexMaclean updated
https://github.com/llvm/llvm-project/pull/135644
>From fd11c2b4c964a3fe336e3fcb106fca5bf9c7d2b2 Mon Sep 17 00:00:00 2001
From: Alex Maclean
Date: Fri, 11 Apr 2025 17:59:50 +
Subject: [PATCH 1/4] [NVPTX] Cleaup and document nvvm.fabs intrinsics, adding
@@ -1034,6 +1034,10 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned
BuiltinID,
case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
*this);
+ case NVPTX::BI__nvvm_abs_bf16
https://github.com/AlexMaclean updated
https://github.com/llvm/llvm-project/pull/135644
>From fd11c2b4c964a3fe336e3fcb106fca5bf9c7d2b2 Mon Sep 17 00:00:00 2001
From: Alex Maclean
Date: Fri, 11 Apr 2025 17:59:50 +
Subject: [PATCH 1/5] [NVPTX] Cleaup and document nvvm.fabs intrinsics, adding
@@ -309,6 +309,60 @@ space casted to this space), 1 is returned, otherwise 0 is
returned.
Arithmetic Intrinsics
-
+'``llvm.nvvm.fabs.*``' Intrinsic
+
+
+Syntax:
+"""
+
+.. code-block:: llvm
+
+declare float @llvm.nvv
https://github.com/AlexMaclean updated
https://github.com/llvm/llvm-project/pull/135644
>From fd11c2b4c964a3fe336e3fcb106fca5bf9c7d2b2 Mon Sep 17 00:00:00 2001
From: Alex Maclean
Date: Fri, 11 Apr 2025 17:59:50 +
Subject: [PATCH 1/6] [NVPTX] Cleaup and document nvvm.fabs intrinsics, adding
@@ -1034,6 +1034,10 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned
BuiltinID,
case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
*this);
+ case NVPTX::BI__nvvm_abs_bf16
@@ -411,6 +412,13 @@ static Instruction
*convertNvvmIntrinsicToLlvm(InstCombiner &IC,
}
return nullptr;
}
+ case SPC_Fabs: {
+if (!II->getType()->isDoubleTy())
+ return nullptr;
+auto *Fabs = Intrinsic::getOrInsertDeclaration(
+II->getModule(),
@@ -1034,6 +1034,10 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned
BuiltinID,
case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
*this);
+ case NVPTX::BI__nvvm_abs_bf16
@@ -982,8 +982,9 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
case ADDRESS_SPACE_SHARED:
Opc = TM.is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared;
break;
-case ADDRESS_SPACE_DSHARED:
- Opc = TM.is64Bit() ? NVPTX::cvta_dshared_64 :
@@ -426,10 +426,7 @@ static std::optional evaluateIsSpace(Intrinsic::ID
IID, unsigned AS) {
case Intrinsic::nvvm_isspacep_shared:
return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
AlexMaclean wrote:
If the address space is `ADDRESS_SPACE_SHARED_CLUSTER` this i
@@ -25,6 +25,7 @@ enum AddressSpace : unsigned {
ADDRESS_SPACE_CONST = 4,
ADDRESS_SPACE_LOCAL = 5,
ADDRESS_SPACE_TENSOR = 6,
+ ADDRESS_SPACE_SHARED_CLUSTER = 7,
AlexMaclean wrote:
I think it would be good to rename `ADDRESS_SPACE_SHARED` to
`ADDRESS_SP
@@ -3019,8 +3019,26 @@ SDValue NVPTXTargetLowering::LowerADDRSPACECAST(SDValue
Op,
unsigned SrcAS = N->getSrcAddressSpace();
unsigned DestAS = N->getDestAddressSpace();
if (SrcAS != llvm::ADDRESS_SPACE_GENERIC &&
- DestAS != llvm::ADDRESS_SPACE_GENERIC)
+ DestA
@@ -426,10 +426,7 @@ static std::optional evaluateIsSpace(Intrinsic::ID
IID, unsigned AS) {
case Intrinsic::nvvm_isspacep_shared:
return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
case Intrinsic::nvvm_isspacep_shared_cluster:
-// We can't tell shared from shared_cluster
@@ -176,6 +176,7 @@ enum AddressSpace : AddressSpaceUnderlyingType {
Shared = 3,
AlexMaclean wrote:
Lets rename this to `SharedCTA` as well.
https://github.com/llvm/llvm-project/pull/135444
___
cfe-commits mailing l
@@ -3019,8 +3019,26 @@ SDValue NVPTXTargetLowering::LowerADDRSPACECAST(SDValue
Op,
unsigned SrcAS = N->getSrcAddressSpace();
unsigned DestAS = N->getDestAddressSpace();
if (SrcAS != llvm::ADDRESS_SPACE_GENERIC &&
- DestAS != llvm::ADDRESS_SPACE_GENERIC)
+ DestA
@@ -2381,25 +2387,38 @@ def INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"u32",
Int32Regs>;
def INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"f32", Float32Regs>;
-multiclass NG_TO_G {
- def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
- "cvta." # Str # ".u
https://github.com/AlexMaclean updated
https://github.com/llvm/llvm-project/pull/127736
>From bae09a6c197306efce76080921843f2b67a6333c Mon Sep 17 00:00:00 2001
From: Alex Maclean
Date: Wed, 19 Feb 2025 02:26:23 +
Subject: [PATCH] [NVPTX] Convert vector function nvvm.annotations to
attribut
https://github.com/AlexMaclean updated
https://github.com/llvm/llvm-project/pull/127736
>From fd8f342fa2b65f7604955c88e2b73e758dc17134 Mon Sep 17 00:00:00 2001
From: Alex Maclean
Date: Wed, 19 Feb 2025 02:26:23 +
Subject: [PATCH] [NVPTX] Convert vector function nvvm.annotations to
attribut
@@ -5021,6 +5024,36 @@ bool llvm::UpgradeDebugInfo(Module &M) {
return Modified;
}
+static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
+GlobalValue *GV, const Metadata *V) {
+ Function *F = cast(GV);
+
+ constexpr
@@ -5021,6 +5024,36 @@ bool llvm::UpgradeDebugInfo(Module &M) {
return Modified;
}
+static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
+GlobalValue *GV, const Metadata *V) {
+ Function *F = cast(GV);
+
+ constexpr
https://github.com/AlexMaclean updated
https://github.com/llvm/llvm-project/pull/127736
>From fd8f342fa2b65f7604955c88e2b73e758dc17134 Mon Sep 17 00:00:00 2001
From: Alex Maclean
Date: Wed, 19 Feb 2025 02:26:23 +
Subject: [PATCH 1/2] [NVPTX] Convert vector function nvvm.annotations to
attr
@@ -506,24 +507,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const
Function &F,
// If the NVVM IR has some of reqntid* specified, then output
// the reqntid directive, and set the unspecified ones to 1.
// If none of Reqntid* is specified, don't output reqnti
@@ -5021,6 +5024,36 @@ bool llvm::UpgradeDebugInfo(Module &M) {
return Modified;
}
+static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
+GlobalValue *GV, const Metadata *V) {
+ Function *F = cast(GV);
+
+ constexpr
@@ -5059,6 +5092,18 @@ bool static upgradeSingleNVVMAnnotation(GlobalValue *GV,
StringRef K,
cast(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
return true;
}
+ if (K.consume_front("maxntid") && (K == "x" || K == "y" || K == "z")) {
AlexMaclean
@@ -196,6 +198,36 @@ static std::optional getFnAttrParsedInt(const
Function &F,
: std::nullopt;
}
+static SmallVector getFnAttrParsedVector(const Function &F,
+ StringRef Attr) {
+ SmallVector V;
+ auto &Ctx
@@ -196,6 +198,36 @@ static std::optional getFnAttrParsedInt(const
Function &F,
: std::nullopt;
}
+static SmallVector getFnAttrParsedVector(const Function &F,
+ StringRef Attr) {
+ SmallVector V;
+ auto &Ctx
https://github.com/AlexMaclean updated
https://github.com/llvm/llvm-project/pull/127736
>From b637f2a9142aa9493e78f8d6e05b692b7175c123 Mon Sep 17 00:00:00 2001
From: Alex Maclean
Date: Wed, 19 Feb 2025 02:26:23 +
Subject: [PATCH 1/3] [NVPTX] Convert vector function nvvm.annotations to
attr
https://github.com/AlexMaclean updated
https://github.com/llvm/llvm-project/pull/127736
>From 5ca8b82e146439453b51f990e4ed43f8bd2838eb Mon Sep 17 00:00:00 2001
From: Alex Maclean
Date: Wed, 19 Feb 2025 02:26:23 +
Subject: [PATCH 1/3] [NVPTX] Convert vector function nvvm.annotations to
attr
@@ -5021,6 +5024,36 @@ bool llvm::UpgradeDebugInfo(Module &M) {
return Modified;
}
+static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
+GlobalValue *GV, const Metadata *V) {
+ Function *F = cast(GV);
+
+ constexpr
AlexMaclean wrote:
@hanhanW, @akuegel Heads up, if you're using any of these annotations, I expect
you'll need to update your respective out-of-tree frontends once this change
lands (similar to https://github.com/llvm/llvm-project/pull/119261).
Here's an example of what that might look like:
AlexMaclean wrote:
> I think they will become something like:
>
> ```c++
> llvmFunc->addFnAttr("nvvm.maxntid", llvm::utostr(workgroupSize[0]));
> llvmFunc->addFnAttr("nvvm.maxntid", llvm::utostr(workgroupSize[1]));
> llvmFunc->addFnAttr("nvvm.maxntid", llvm::utostr(workgroupSize[2]));
> ```
Not
@@ -2070,8 +2070,8 @@ defm INT_PTX_ATOMIC_UMIN_32 : F_ATOMIC_2_AS]>;
// atom_inc atom_dec
AlexMaclean wrote:
I think it makes sense to test the auto-upgrade rules and test the lowering of
the current syntax but not to maintain lowering tests using out-of-dat
@@ -2314,6 +2317,12 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name,
CallBase *CI,
Value *Val = CI->getArgOperand(1);
Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
AtomicOrdering::SequentiallyConsi
https://github.com/AlexMaclean closed
https://github.com/llvm/llvm-project/pull/137698
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
AlexMaclean wrote:
> This merge broke our builds on Halide.
>
> ```
> Unhandled exception: Error: Could not find PTX barrier intrinsic
> (llvm.nvvm.barrier0)
> ```
>
> We have [an `.ll`
> file](https://github.com/halide/Halide/blob/main/src/runtime/ptx_dev.ll)
> declaring these intrinsics:
>
https://github.com/AlexMaclean updated
https://github.com/llvm/llvm-project/pull/141143
>From a46075f9aa3970735104cbcf2503ebef89db Mon Sep 17 00:00:00 2001
From: Alex MacLean
Date: Wed, 21 May 2025 08:14:15 -0700
Subject: [PATCH 1/2] [NVPTX] Unify and extend barrier{.cta} intrinsic support
https://github.com/AlexMaclean closed
https://github.com/llvm/llvm-project/pull/140615
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
https://github.com/AlexMaclean updated
https://github.com/llvm/llvm-project/pull/140615
>From babb28ef1c935f0d0cfb3b40f62be860be027010 Mon Sep 17 00:00:00 2001
From: Alex Maclean
Date: Thu, 15 May 2025 18:12:11 +
Subject: [PATCH 1/5] [NVPTX] Unify and extend barrier{.cta} intrinsic support
@@ -240,6 +240,47 @@ def BF16RT : RegTyInfo;
def F16X2RT : RegTyInfo;
def BF16X2RT : RegTyInfo;
+// This class provides a basic wrapper around an NVPTXInst that abstracts the
+// specific syntax of most PTX instructions. It automatically handles the
+// construction of the
@@ -462,24 +462,28 @@ def NVVM_MBarrierTestWaitSharedOp :
NVVM_Op<"mbarrier.test.wait.shared">,
// NVVM synchronization op definitions
//===--===//
-def NVVM_Barrier0Op : NVVM_IntrOp<"barrier0"> {
+def NVVM_
@@ -462,24 +462,28 @@ def NVVM_MBarrierTestWaitSharedOp :
NVVM_Op<"mbarrier.test.wait.shared">,
// NVVM synchronization op definitions
//===--===//
-def NVVM_Barrier0Op : NVVM_IntrOp<"barrier0"> {
+def NVVM_
@@ -462,24 +462,28 @@ def NVVM_MBarrierTestWaitSharedOp :
NVVM_Op<"mbarrier.test.wait.shared">,
// NVVM synchronization op definitions
//===--===//
-def NVVM_Barrier0Op : NVVM_IntrOp<"barrier0"> {
+def NVVM_
@@ -199,21 +199,58 @@ map in the following way to CUDA builtins:
Barriers
-'``llvm.nvvm.barrier0``'
-^^^
+'``llvm.nvvm.barrier.cta.*``'
+^
Syntax:
"""
.. code-block:: llvm
- declare void @llvm.nvvm.barr
@@ -71,14 +71,6 @@ define float @nvvm_rcp(float %0) {
ret float %2
}
-; CHECK-LABEL: @llvm_nvvm_barrier0()
-define void @llvm_nvvm_barrier0() {
- ; CHECK: nvvm.barrier0
- call void @llvm.nvvm.barrier0()
- ret void
-}
-
AlexMaclean wrote:
I've added this
https://github.com/AlexMaclean updated
https://github.com/llvm/llvm-project/pull/140615
>From babb28ef1c935f0d0cfb3b40f62be860be027010 Mon Sep 17 00:00:00 2001
From: Alex Maclean
Date: Thu, 15 May 2025 18:12:11 +
Subject: [PATCH 1/3] [NVPTX] Unify and extend barrier{.cta} intrinsic support
@@ -1349,6 +1349,10 @@ static bool upgradeIntrinsicFunction1(Function *F,
Function *&NewFn,
else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f" ||
Name == "swap.lo.hi.b64")
Expand = true;
+ else if (Name == "barrier0" || Name == "b
@@ -240,6 +240,34 @@ def BF16RT : RegTyInfo;
def F16X2RT : RegTyInfo;
def BF16X2RT : RegTyInfo;
+// This class provides a basic wrapper around an NVPTXInst that abstracts the
+// specific syntax of most PTX instructions. It automatically handles the
+// construction of the
@@ -6,13 +7,15 @@
; Use bar.sync to arrive at a pre-computed barrier number and
; wait for all threads in CTA to also arrive:
define ptx_device void @test_barrier_named_cta() {
-; CHECK: mov.b32 %r[[REG0:[0-9]+]], 0;
-; CHECK: bar.sync %r[[REG0]];
-; CHECK: mov.b32 %r[[REG1:[
https://github.com/AlexMaclean updated
https://github.com/llvm/llvm-project/pull/140615
>From babb28ef1c935f0d0cfb3b40f62be860be027010 Mon Sep 17 00:00:00 2001
From: Alex Maclean
Date: Thu, 15 May 2025 18:12:11 +
Subject: [PATCH 1/4] [NVPTX] Unify and extend barrier{.cta} intrinsic support
101 - 156 of 156 matches
Mail list logo