scott.linder created this revision. Herald added subscribers: foad, kerbowa, hiraditya, t-tye, Anastasia, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl, arsenm. Herald added a project: All. scott.linder requested review of this revision. Herald added subscribers: llvm-commits, cfe-commits, wdng. Herald added projects: clang, LLVM.
We are currently overly conservative, and reject modules which contain dead uses of hostcall. This has the effect of preventing compilation at -O0 of a use of printf in an otherwise empty module (i.e. one containing no use of hostcall). In a non-single-source world it is not possible to determine whether the call is dead in the compiler, even if we wanted to. Instead, make the error a warning, as the compiler cannot diagnose it without some false positives, but it will remain an issue the user should be aware of when producing pre-V5 HSA ABI code objects. Expand testing to modules with printf+no-hostcall, printf+dead-hostcall, and printf+hostcall to exercise the warning. Add clang tests to tie the above tests to OpenCL source. Specifically, the test llvm/test/CodeGen/AMDGPU/opencl-printf-dead-hostcall.ll represents the case which previously failed at -O0. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D121951 Files: clang/test/CodeGenOpenCL/amdgpu-printf.cl llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll llvm/test/CodeGen/AMDGPU/opencl-printf-dead-hostcall.ll llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll
Index: llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll +++ llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll @@ -1,18 +1,17 @@ -; RUN: not opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-printf-runtime-binding < %s 2>&1 | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-printf-runtime-binding < %s 2>&1 | FileCheck %s -@.str = private unnamed_addr addrspace(2) constant [6 x i8] c"%s:%d\00", align 1 +@.str = private unnamed_addr addrspace(4) constant [6 x i8] c"%s:%d\00", align 1 define amdgpu_kernel void @test_kernel(i32 %n) { entry: - %str = alloca [9 x i8], align 1 - %arraydecay = getelementptr inbounds [9 x i8], [9 x i8]* %str, i32 0, i32 0 - %call1 = call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8* %arraydecay, i32 %n) - %call2 = call <2 x i64> (i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) @__ockl_hostcall_internal(i8* undef, i32 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9) + %str = alloca [9 x i8], align 1, addrspace(5) + %arraydecay = getelementptr inbounds [9 x i8], [9 x i8] addrspace(5)* %str, i32 0, i32 0 + %call = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @.str, i32 0, i32 0), i8 addrspace(5)* %arraydecay, i32 %n) ret void } -declare i32 @printf(i8 addrspace(2)*, ...) +declare i32 @printf(i8 addrspace(4)*, ...) declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) -; CHECK: error: Cannot use both printf and hostcall in the same module +; CHECK-NOT: warning: Cannot use both printf and hostcall in the same module Index: llvm/test/CodeGen/AMDGPU/opencl-printf-dead-hostcall.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/opencl-printf-dead-hostcall.ll @@ -0,0 +1,22 @@ +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-printf-runtime-binding < %s 2>&1 | FileCheck %s + +@.str = private unnamed_addr addrspace(4) constant [6 x i8] c"%s:%d\00", align 1 + +define amdgpu_kernel void @test_kernel(i32 %n) { +entry: + %str = alloca [9 x i8], align 1, addrspace(5) + %arraydecay = getelementptr inbounds [9 x i8], [9 x i8] addrspace(5)* %str, i32 0, i32 0 + %call = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @.str, i32 0, i32 0), i8 addrspace(5)* %arraydecay, i32 %n) + ret void +} + +define amdgpu_kernel void @dead_function() { + %call = call <2 x i64> (i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) @__ockl_hostcall_internal(i8* undef, i32 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9) + ret void +} + +declare i32 @printf(i8 addrspace(4)*, ...) + +declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) + +; CHECK: warning: Cannot use both printf and hostcall in the same module Index: llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll @@ -0,0 +1,18 @@ +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-printf-runtime-binding < %s 2>&1 | FileCheck %s + +@.str = private unnamed_addr addrspace(4) constant [6 x i8] c"%s:%d\00", align 1 + +define amdgpu_kernel void @test_kernel(i32 %n) { +entry: + %str = alloca [9 x i8], align 1, addrspace(5) + %arraydecay = getelementptr inbounds [9 x i8], [9 x i8] addrspace(5)* %str, i32 0, i32 0 + %call1 = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @.str, i32 0, i32 0), i8 addrspace(5)* %arraydecay, i32 %n) + %call2 = call <2 x i64> (i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) @__ockl_hostcall_internal(i8* undef, i32 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9) + ret void +} + +declare i32 @printf(i8 addrspace(4)*, ...) + +declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) + +; CHECK: warning: Cannot use both printf and hostcall in the same module Index: llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -563,11 +564,12 @@ if (Printfs.empty()) return false; - if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) { + if (auto *HostcallFunction = M.getFunction("__ockl_hostcall_internal")) { for (auto &U : HostcallFunction->uses()) { if (auto *CI = dyn_cast<CallInst>(U.getUser())) { - M.getContext().emitError( - CI, "Cannot use both printf and hostcall in the same module"); + M.getContext().diagnose(DiagnosticInfoInlineAsm( + *CI, "Cannot use both printf and hostcall in the same module", + DS_Warning)); } } } Index: clang/test/CodeGenOpenCL/amdgpu-printf.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/amdgpu-printf.cl @@ -0,0 +1,46 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); + +// CHECK-LABEL: @test_printf_noargs( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([1 x i8], [1 x i8] addrspace(4)* @.str, i64 0, i64 0)) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: ret void +// +__kernel void test_printf_noargs() { + printf(""); +} + +// CHECK-LABEL: @test_printf_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store i32 [[I:%.*]], i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @.str.1, i64 0, i64 0), i32 noundef [[TMP0]]) #[[ATTR4]] +// CHECK-NEXT: ret void +// +__kernel void test_printf_int(int i) { + printf("%d", i); +} + +// CHECK-LABEL: @test_printf_str_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5) +// CHECK-NEXT: store i32 [[I:%.*]], i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)* +// CHECK-NEXT: call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)* +// CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 1 [[TMP1]], i8 addrspace(4)* align 1 getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @__const.test_printf_str_int.s, i32 0, i32 0), i64 4, i1 false) +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], [4 x i8] addrspace(5)* [[S]], i64 0, i64 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @.str.2, i64 0, i64 0), i8 addrspace(5)* noundef [[ARRAYDECAY]], i32 noundef [[TMP2]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)* +// CHECK-NEXT: call void @llvm.lifetime.end.p5i8(i64 4, i8 addrspace(5)* [[TMP3]]) #[[ATTR5]] +// CHECK-NEXT: ret void +// +__kernel void test_printf_str_int(int i) { + char s[] = "foo"; + printf("%s:%d", s, i); +}
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits