hliao created this revision.
hliao added a reviewer: tra.
Herald added subscribers: cfe-commits, hiraditya, mgorny, jholewinski.
Herald added a project: clang.
- Replace them with the internal version, i.e. `nvvm.texsurf.handle.internal`
just before the instruction selector.
- Teach clang codegen to generate `nvvm.texsurf.handle` instead of
`nvvm.texsurf.handle.internal`.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D77777
Files:
clang/lib/CodeGen/TargetInfo.cpp
clang/test/CodeGenCUDA/surface.cu
clang/test/CodeGenCUDA/texture.cu
llvm/lib/Target/NVPTX/CMakeLists.txt
llvm/lib/Target/NVPTX/NVPTX.h
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
llvm/lib/Target/NVPTX/NVPTXTexSurfHandleInternalizer.cpp
llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
Index: llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
===================================================================
--- llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -6,6 +6,7 @@
declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32)
declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
+declare i64 @llvm.nvvm.texsurf.handle.p1i64(metadata, i64 addrspace(1)*)
; SM20-LABEL: .entry foo
; SM30-LABEL: .entry foo
@@ -28,7 +29,7 @@
; SM20-LABEL: .entry bar
; SM30-LABEL: .entry bar
define void @bar(float* %red, i32 %idx) {
-; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
+; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
%texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0)
; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
@@ -40,7 +41,24 @@
ret void
}
-!nvvm.annotations = !{!1, !2, !3}
+; SM20-LABEL: .entry bax
+; SM30-LABEL: .entry bax
+define void @bax(float* %red, i32 %idx) {
+; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
+ %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.p1i64(metadata !5, i64 addrspace(1)* @tex0)
+; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
+; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
+ %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
+ %ret = extractvalue { float, float, float, float } %val, 0
+; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
+; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
+ store float %ret, float* %red
+ ret void
+}
+
+!nvvm.annotations = !{!1, !2, !3, !4}
!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1}
!2 = !{void (float*, i32)* @bar, !"kernel", i32 1}
-!3 = !{i64 addrspace(1)* @tex0, !"texture", i32 1}
+!3 = !{void (float*, i32)* @bax, !"kernel", i32 1}
+!4 = !{i64 addrspace(1)* @tex0, !"texture", i32 1}
+!5 = !{i64 addrspace(1)* @tex0}
Index: llvm/lib/Target/NVPTX/NVPTXTexSurfHandleInternalizer.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Target/NVPTX/NVPTXTexSurfHandleInternalizer.cpp
@@ -0,0 +1,81 @@
+//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// Replace `nvvm.texsurf.handle` intrinsics with their internal version, i.e.
+// `nvvm.texsurf.handle.internal`.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nvptx-texsurf-handle-internalizer"
+
+namespace llvm {
+void initializeTexSurfHandleInternalizerPass(PassRegistry &);
+}
+
+namespace {
+
+class TexSurfHandleInternalizer : public FunctionPass {
+public:
+ static char ID;
+
+ TexSurfHandleInternalizer() : FunctionPass(ID) {
+ initializeTexSurfHandleInternalizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Internalize `nvvm.texsurf.handle` intrinsics";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ }
+
+ bool runOnFunction(Function &F) override {
+ bool Changed = false;
+ for (auto &BB : F)
+ for (auto BI = BB.begin(), BE = BB.end(); BI != BE; /*EMPTY*/) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*BI++);
+ if (!II || II->getIntrinsicID() != Intrinsic::nvvm_texsurf_handle)
+ continue;
+ assert(II->getArgOperand(1) ==
+ cast<ValueAsMetadata>(
+ cast<MetadataAsValue>(II->getArgOperand(0))->getMetadata())
+ ->getValue());
+ // Replace it with the internal version.
+ IRBuilder<> Builder(II);
+ auto *NewII = Builder.CreateUnaryIntrinsic(
+ Intrinsic::nvvm_texsurf_handle_internal, II->getArgOperand(1));
+ II->replaceAllUsesWith(NewII);
+ II->eraseFromParent();
+ Changed = true;
+ }
+ return Changed;
+ }
+};
+
+} // end of anonymous namespace
+
+FunctionPass *llvm::createNVPTXTexSurfHandleInternalizerPass() {
+ return new TexSurfHandleInternalizer();
+}
+
+char TexSurfHandleInternalizer::ID = 0;
+
+INITIALIZE_PASS(TexSurfHandleInternalizer, "nvptx-texsurf-handle-internalizer",
+ "Interalize texsurf-handle intrinsic", false, false)
Index: llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -161,6 +161,7 @@
}
void addIRPasses() override;
+ bool addPreISel() override;
bool addInstSelector() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
@@ -300,6 +301,11 @@
}
}
+bool NVPTXPassConfig::addPreISel() {
+ addPass(createNVPTXTexSurfHandleInternalizerPass());
+ return false;
+}
+
bool NVPTXPassConfig::addInstSelector() {
const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
Index: llvm/lib/Target/NVPTX/NVPTX.h
===================================================================
--- llvm/lib/Target/NVPTX/NVPTX.h
+++ llvm/lib/Target/NVPTX/NVPTX.h
@@ -47,6 +47,7 @@
FunctionPass *createNVPTXLowerAllocaPass();
MachineFunctionPass *createNVPTXPeephole();
MachineFunctionPass *createNVPTXProxyRegErasurePass();
+FunctionPass *createNVPTXTexSurfHandleInternalizerPass();
namespace NVPTX {
enum DrvInterface {
Index: llvm/lib/Target/NVPTX/CMakeLists.txt
===================================================================
--- llvm/lib/Target/NVPTX/CMakeLists.txt
+++ llvm/lib/Target/NVPTX/CMakeLists.txt
@@ -19,20 +19,21 @@
NVPTXImageOptimizer.cpp
NVPTXInstrInfo.cpp
NVPTXLowerAggrCopies.cpp
- NVPTXLowerArgs.cpp
NVPTXLowerAlloca.cpp
- NVPTXPeephole.cpp
+ NVPTXLowerArgs.cpp
NVPTXMCExpr.cpp
+ NVPTXPeephole.cpp
NVPTXPrologEpilogPass.cpp
+ NVPTXProxyRegErasure.cpp
NVPTXRegisterInfo.cpp
NVPTXReplaceImageHandles.cpp
NVPTXSubtarget.cpp
NVPTXTargetMachine.cpp
NVPTXTargetTransformInfo.cpp
+ NVPTXTexSurfHandleInternalizer.cpp
NVPTXUtilities.cpp
NVVMIntrRange.cpp
NVVMReflect.cpp
- NVPTXProxyRegErasure.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
Index: clang/test/CodeGenCUDA/texture.cu
===================================================================
--- clang/test/CodeGenCUDA/texture.cu
+++ clang/test/CodeGenCUDA/texture.cu
@@ -37,9 +37,9 @@
__attribute__((device)) v4f tex2d_ld(texture<float, 2, NormalizedFloat>, int, int) asm("llvm.nvvm.tex.unified.2d.v4f32.s32");
// DEVICE-LABEL: float @_Z3fooff(float %x, float %y)
-// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex)
+// DEVICE: call i64 @llvm.nvvm.texsurf.handle.p1i64(metadata [[TEX:.*]], [[TEX]])
// DEVICE: call %struct.v4f @llvm.nvvm.tex.unified.2d.v4f32.f32(i64 %{{.*}}, float %{{.*}}, float %{{.*}})
-// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @norm)
+// DEVICE: call i64 @llvm.nvvm.texsurf.handle.p1i64(metadata [[NORM:.*]], [[NORM]])
// DEVICE: call %struct.v4f @llvm.nvvm.tex.unified.2d.v4f32.s32(i64 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
__attribute__((device)) float foo(float x, float y) {
return tex2d_ld(tex, x, y).x + tex2d_ld(norm, int(x), int(y)).x;
Index: clang/test/CodeGenCUDA/surface.cu
===================================================================
--- clang/test/CodeGenCUDA/surface.cu
+++ clang/test/CodeGenCUDA/surface.cu
@@ -28,7 +28,7 @@
__attribute__((device)) int suld_2d_zero(surface<void, 2>, int, int) asm("llvm.nvvm.suld.2d.i32.zero");
// DEVICE-LABEL: i32 @_Z3fooii(i32 %x, i32 %y)
-// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf)
+// DEVICE: call i64 @llvm.nvvm.texsurf.handle.p1i64(metadata [[SURF:.*]], [[SURF]])
// DEVICE: call i32 @llvm.nvvm.suld.2d.i32.zero(i64 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
__attribute__((device)) int foo(int x, int y) {
return suld_2d_zero(surf, x, y);
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -6482,12 +6482,14 @@
if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
+ llvm::Value *MD = llvm::MetadataAsValue::get(
+ CGF.getLLVMContext(), llvm::ConstantAsMetadata::get(GV));
// Load the handle from the specific global variable using
// `nvvm.texsurf.handle.internal` intrinsic.
Handle = CGF.EmitRuntimeCall(
- CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle,
{GV->getType()}),
- {GV}, "texsurf_handle");
+ {MD, GV}, "texsurf_handle");
} else
Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
CGF.EmitStoreOfScalar(Handle, Dst);
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits