scott.linder created this revision. Herald added subscribers: hsmhsm, foad, Naghasan, ldrumm, kerbowa, hiraditya, t-tye, Anastasia, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm. Herald added a project: All. scott.linder requested review of this revision. Herald added subscribers: llvm-commits, cfe-commits, wdng. Herald added projects: clang, LLVM.
The diagnostic is unreliable, and triggers even for dead uses of hostcall that may exist when linking the device-libs at lower optimization levels. Eliminate the diagnostic, and directly document the limitation for OpenCL before code object V5. Make some NFC changes to clarify the related code in the MetadataStreamer. Add a clang test to tie OCL sources containing printf to the backend IR tests for this situation. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D122669 Files: clang/test/CodeGenOpenCL/amdgpu-printf.cl llvm/docs/AMDGPUUsage.rst llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll
Index: llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll +++ /dev/null @@ -1,18 +0,0 @@ -; RUN: not opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-printf-runtime-binding < %s 2>&1 | FileCheck %s - -@.str = private unnamed_addr addrspace(2) constant [6 x i8] c"%s:%d\00", align 1 - -define amdgpu_kernel void @test_kernel(i32 %n) { -entry: - %str = alloca [9 x i8], align 1 - %arraydecay = getelementptr inbounds [9 x i8], [9 x i8]* %str, i32 0, i32 0 - %call1 = call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8* %arraydecay, i32 %n) - %call2 = call <2 x i64> (i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) @__ockl_hostcall_internal(i8* undef, i32 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9) - ret void -} - -declare i32 @printf(i8 addrspace(2)*, ...) - -declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) - -; CHECK: error: Cannot use both printf and hostcall in the same module Index: llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-printf-runtime-binding < %s 2>&1 | FileCheck %s + +@.str = private unnamed_addr addrspace(4) constant [6 x i8] c"%s:%d\00", align 1 + +define amdgpu_kernel void @test_kernel(i32 %n) { +entry: + %str = alloca [9 x i8], align 1, addrspace(5) + %arraydecay = getelementptr inbounds [9 x i8], [9 x i8] addrspace(5)* %str, i32 0, i32 0 + %call1 = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @.str, i32 0, i32 0), i8 addrspace(5)* %arraydecay, i32 %n) + %call2 = call <2 x i64> (i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) @__ockl_hostcall_internal(i8* undef, i32 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9) + ret void +} + +declare i32 @printf(i8 addrspace(4)*, ...) + +declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) + +; CHECK-NOT: error: +; CHECK-NOT: warning: Index: llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -563,15 +563,6 @@ if (Printfs.empty()) return false; - if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) { - for (auto &U : HostcallFunction->uses()) { - if (auto *CI = dyn_cast<CallInst>(U.getUser())) { - M.getContext().emitError( - CI, "Cannot use both printf and hostcall in the same module"); - } - } - } - TD = &M.getDataLayout(); return lowerPrintfForGpu(M); Index: llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -401,17 +401,15 @@ auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); - // Emit "printf buffer" argument if printf is used, otherwise emit dummy - // "none" argument. if (HiddenArgNumBytes >= 32) { + // We forbid the use of features requiring hostcall when compiling OpenCL + // before code object V5, which makes the mutual exclusion between the + // "printf buffer" and "hostcall buffer" here sound. if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer); - else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) { - // The printf runtime binding pass should have ensured that hostcall and - // printf are not used in the same module. - assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); + else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer); - } else + else emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); } @@ -820,19 +818,17 @@ auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); - // Emit "printf buffer" argument if printf is used, emit "hostcall buffer" - // if "hostcall" module flag is set, otherwise emit dummy "none" argument. if (HiddenArgNumBytes >= 32) { + // We forbid the use of features requiring hostcall when compiling OpenCL + // before code object V5, which makes the mutual exclusion between the + // "printf buffer" and "hostcall buffer" here sound. if (M->getNamedMetadata("llvm.printf.fmts")) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, Args); - else if (MFI.hasHostcallPtr()) { - // The printf runtime binding pass should have ensured that hostcall and - // printf are not used in the same module. - assert(!M->getNamedMetadata("llvm.printf.fmts")); + else if (MFI.hasHostcallPtr()) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, Args); - } else + else emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); } Index: llvm/docs/AMDGPUUsage.rst =================================================================== --- llvm/docs/AMDGPUUsage.rst +++ llvm/docs/AMDGPUUsage.rst @@ -2827,12 +2827,16 @@ "HiddenPrintfBuffer" A global address space pointer to the runtime printf buffer - is passed in kernarg. + is passed in kernarg. Mutually + exclusive with + "HiddenHostcallBuffer". "HiddenHostcallBuffer" A global address space pointer to the runtime hostcall buffer - is passed in kernarg. + is passed in kernarg. Mutually + exclusive with + "HiddenPrintfBuffer". "HiddenDefaultQueue" A global address space pointer @@ -3352,12 +3356,18 @@ "hidden_printf_buffer" A global address space pointer to the runtime printf buffer - is passed in kernarg. + is passed in kernarg. Mutually + exclusive with + "hidden_hostcall_buffer" + before Code Object V5. "hidden_hostcall_buffer" A global address space pointer to the runtime hostcall buffer - is passed in kernarg. + is passed in kernarg. Mutually + exclusive with + "hidden_printf_buffer" + before Code Object V5. "hidden_default_queue" A global address space pointer Index: clang/test/CodeGenOpenCL/amdgpu-printf.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/amdgpu-printf.cl @@ -0,0 +1,46 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); + +// CHECK-LABEL: @test_printf_noargs( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([1 x i8], [1 x i8] addrspace(4)* @.str, i64 0, i64 0)) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: ret void +// +__kernel void test_printf_noargs() { + printf(""); +} + +// CHECK-LABEL: @test_printf_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store i32 [[I:%.*]], i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @.str.1, i64 0, i64 0), i32 noundef [[TMP0]]) #[[ATTR4]] +// CHECK-NEXT: ret void +// +__kernel void test_printf_int(int i) { + printf("%d", i); +} + +// CHECK-LABEL: @test_printf_str_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5) +// CHECK-NEXT: store i32 [[I:%.*]], i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)* +// CHECK-NEXT: call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)* +// CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 1 [[TMP1]], i8 addrspace(4)* align 1 getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @__const.test_printf_str_int.s, i32 0, i32 0), i64 4, i1 false) +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], [4 x i8] addrspace(5)* [[S]], i64 0, i64 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @.str.2, i64 0, i64 0), i8 addrspace(5)* noundef [[ARRAYDECAY]], i32 noundef [[TMP2]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)* +// CHECK-NEXT: call void @llvm.lifetime.end.p5i8(i64 4, i8 addrspace(5)* [[TMP3]]) #[[ATTR5]] +// CHECK-NEXT: ret void +// +__kernel void test_printf_str_int(int i) { + char s[] = "foo"; + printf("%s:%d", s, i); +}
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits