sameerds updated this revision to Diff 406889.
sameerds added a comment.
added tests for i128 load. hostcall position is now independent of subtarget.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D119216/new/
https://reviews.llvm.org/D119216
Files:
clang/lib/CodeGen/CodeGenModule.cpp
clang/test/CodeGenCUDA/amdgpu-asan-printf.cu
clang/test/CodeGenCUDA/amdgpu-asan.cu
llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
Index: mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
===================================================================
--- mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
+++ mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
@@ -308,11 +308,6 @@
}
}
- // Set amdgpu_hostcall if host calls have been linked, as needed by newer LLVM
- // FIXME: Is there a way to set this during printf() lowering that makes sense
- if (ret->getFunction("__ockl_hostcall_internal"))
- if (!ret->getModuleFlag("amdgpu_hostcall"))
- ret->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
return ret;
}
Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
+++ llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
@@ -61,5 +61,5 @@
attributes #0 = { "uniform-work-group-size"="false" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
@@ -101,7 +101,7 @@
attributes #0 = { nounwind readnone }
attributes #1 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
+++ llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
@@ -41,6 +41,6 @@
attributes #1 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
+++ llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
@@ -41,6 +41,6 @@
attributes #2 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
+++ llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
@@ -97,6 +97,6 @@
attributes #0 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
+++ llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
@@ -31,5 +31,5 @@
attributes #0 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
Index: llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -73,6 +73,6 @@
;.
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
Index: llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
+++ llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
@@ -202,13 +202,13 @@
attributes #6 = { "amdgpu-flat-work-group-size"="512,512" }
attributes #7 = { "amdgpu-flat-work-group-size"="64,256" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
@@ -0,0 +1,229 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s
+declare void @function1()
+
+declare void @function2() #0
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
+
+; CHECK: amdhsa.kernels:
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel10
+define amdgpu_kernel void @test_kernel10(i8* %a) {
+ store i8 3, i8* %a, align 1
+ ret void
+}
+
+; Call to an extern function
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel20
+define amdgpu_kernel void @test_kernel20(i8* %a) {
+ call void @function1()
+ store i8 3, i8* %a, align 1
+ ret void
+}
+
+; Explicit attribute on kernel
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel21
+define amdgpu_kernel void @test_kernel21(i8* %a) #0 {
+ call void @function1()
+ store i8 3, i8* %a, align 1
+ ret void
+}
+
+; Explicit attribute on extern callee
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel22
+define amdgpu_kernel void @test_kernel22(i8* %a) {
+ call void @function2()
+ store i8 3, i8* %a, align 1
+ ret void
+}
+
+; Access more bytes than the pointer size
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel30
+define amdgpu_kernel void @test_kernel30(i128* %a) {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 72
+ %cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)*
+ %x = load i128, i128 addrspace(4)* %cast
+ store i128 %x, i128* %a
+ ret void
+}
+
+; Typical load of hostcall buffer pointer
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel40
+define amdgpu_kernel void @test_kernel40(i64* %a) {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
+ %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+ %x = load i64, i64 addrspace(4)* %cast
+ store i64 %x, i64* %a
+ ret void
+}
+
+; Typical usage, overriden by explicit attribute on kernel
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel41
+define amdgpu_kernel void @test_kernel41(i64* %a) #0 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
+ %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+ %x = load i64, i64 addrspace(4)* %cast
+ store i64 %x, i64* %a
+ ret void
+}
+
+; Access to implicit arg before the hostcall pointer
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel42
+define amdgpu_kernel void @test_kernel42(i64* %a) {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 72
+ %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+ %x = load i64, i64 addrspace(4)* %cast
+ store i64 %x, i64* %a
+ ret void
+}
+
+; Access to implicit arg after the hostcall pointer
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel43
+define amdgpu_kernel void @test_kernel43(i64* %a) {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 88
+ %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+ %x = load i64, i64 addrspace(4)* %cast
+ store i64 %x, i64* %a
+ ret void
+}
+
+; Accessing a byte just before the hostcall pointer
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel44
+define amdgpu_kernel void @test_kernel44(i8* %a) {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 79
+ %x = load i8, i8 addrspace(4)* %gep, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Accessing a byte inside the hostcall pointer
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel45
+define amdgpu_kernel void @test_kernel45(i8* %a) {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
+ %x = load i8, i8 addrspace(4)* %gep, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Accessing a byte inside the hostcall pointer
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel46
+define amdgpu_kernel void @test_kernel46(i8* %a) {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 87
+ %x = load i8, i8 addrspace(4)* %gep, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Accessing a byte just after the hostcall pointer
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel47
+define amdgpu_kernel void @test_kernel47(i8* %a) {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 88
+ %x = load i8, i8 addrspace(4)* %gep, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Access with an unknown offset
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel50
+define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b
+ %x = load i8, i8 addrspace(4)* %gep, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Multiple geps reaching the hostcall pointer argument.
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel51
+define amdgpu_kernel void @test_kernel51(i8* %a) {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
+ %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 64
+ %x = load i8, i8 addrspace(4)* %gep2, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Multiple geps not reaching the hostcall pointer argument.
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel52
+define amdgpu_kernel void @test_kernel52(i8* %a) {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
+ %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16
+ %x = load i8, i8 addrspace(4)* %gep2, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Access that does not match a known pattern.
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel60
+define amdgpu_kernel void @test_kernel60(i64* %a, i32 %b) {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
+ %x = ptrtoint i8 addrspace(4)* %gep to i64
+ store i64 %x, i64* %a, align 4
+ ret void
+}
+
+attributes #0 = { "amdgpu-no-hostcall-ptr" }
+attributes #1 = { nounwind readnone speculatable willreturn }
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
@@ -0,0 +1,231 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=CHECK %s
+declare void @function1()
+
+declare void @function2() #0
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
+
+; CHECK: amdhsa.kernels:
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel10
+define amdgpu_kernel void @test_kernel10(i8* %a) #2 {
+ store i8 3, i8* %a, align 1
+ ret void
+}
+
+; Call to an extern function
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel20
+define amdgpu_kernel void @test_kernel20(i8* %a) #2 {
+ call void @function1()
+ store i8 3, i8* %a, align 1
+ ret void
+}
+
+; Explicit attribute on kernel
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel21
+define amdgpu_kernel void @test_kernel21(i8* %a) #3 {
+ call void @function1()
+ store i8 3, i8* %a, align 1
+ ret void
+}
+
+; Explicit attribute on extern callee
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel22
+define amdgpu_kernel void @test_kernel22(i8* %a) #2 {
+ call void @function2()
+ store i8 3, i8* %a, align 1
+ ret void
+}
+
+; Access more bytes than the pointer size
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel30
+define amdgpu_kernel void @test_kernel30(i128* %a) #2 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
+ %cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)*
+ %x = load i128, i128 addrspace(4)* %cast
+ store i128 %x, i128* %a
+ ret void
+}
+
+; Typical load of hostcall buffer pointer
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel40
+define amdgpu_kernel void @test_kernel40(i64* %a) #2 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
+ %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+ %x = load i64, i64 addrspace(4)* %cast
+ store i64 %x, i64* %a
+ ret void
+}
+
+; Typical usage, overriden by explicit attribute on kernel
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel41
+define amdgpu_kernel void @test_kernel41(i64* %a) #3 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
+ %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+ %x = load i64, i64 addrspace(4)* %cast
+ store i64 %x, i64* %a
+ ret void
+}
+
+; Access to implicit arg before the hostcall pointer
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel42
+define amdgpu_kernel void @test_kernel42(i64* %a) #2 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
+ %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+ %x = load i64, i64 addrspace(4)* %cast
+ store i64 %x, i64* %a
+ ret void
+}
+
+; Access to implicit arg after the hostcall pointer
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel43
+define amdgpu_kernel void @test_kernel43(i64* %a) #2 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32
+ %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+ %x = load i64, i64 addrspace(4)* %cast
+ store i64 %x, i64* %a
+ ret void
+}
+
+; Accessing a byte just before the hostcall pointer
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel44
+define amdgpu_kernel void @test_kernel44(i8* %a) #2 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 23
+ %x = load i8, i8 addrspace(4)* %gep, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Accessing a byte inside the hostcall pointer
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel45
+define amdgpu_kernel void @test_kernel45(i8* %a) #2 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
+ %x = load i8, i8 addrspace(4)* %gep, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Accessing a byte inside the hostcall pointer
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel46
+define amdgpu_kernel void @test_kernel46(i8* %a) #2 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 31
+ %x = load i8, i8 addrspace(4)* %gep, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Accessing a byte just after the hostcall pointer
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel47
+define amdgpu_kernel void @test_kernel47(i8* %a) #2 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32
+ %x = load i8, i8 addrspace(4)* %gep, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Access with an unknown offset
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel50
+define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) #2 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b
+ %x = load i8, i8 addrspace(4)* %gep, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Multiple geps reaching the hostcall pointer argument.
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel51
+define amdgpu_kernel void @test_kernel51(i8* %a) #2 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
+ %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 8
+ %x = load i8, i8 addrspace(4)* %gep2, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Multiple geps not reaching the hostcall pointer argument.
+
+; CHECK: - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel52
+define amdgpu_kernel void @test_kernel52(i8* %a) #2 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
+ %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16
+ %x = load i8, i8 addrspace(4)* %gep2, align 1
+ store i8 %x, i8* %a, align 1
+ ret void
+}
+
+; Access that does not match a known pattern.
+
+; CHECK: - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL: .name: test_kernel60
+define amdgpu_kernel void @test_kernel60(i64* %a, i32 %b) #2 {
+ %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
+ %x = ptrtoint i8 addrspace(4)* %gep to i64
+ store i64 %x, i64* %a, align 4
+ ret void
+}
+
+attributes #0 = { "amdgpu-no-hostcall-ptr" }
+attributes #1 = { nounwind readnone speculatable willreturn }
+attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" }
+attributes #3 = { "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" }
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-
-; CHECK: ---
-; CHECK: Version: [ 1, 0 ]
-; CHECK: Kernels:
-
-; CHECK: - Name: test_kernel
-; CHECK-NEXT: SymbolName: 'test_kernel@kd'
-; CHECK-NEXT: Language: OpenCL C
-; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
-; CHECK-NEXT: Args:
-; CHECK-NEXT: - Name: a
-; CHECK-NEXT: TypeName: char
-; CHECK-NEXT: Size: 1
-; CHECK-NEXT: Align: 1
-; CHECK-NEXT: ValueKind: ByValue
-; CHECK-NEXT: AccQual: Default
-; CHECK-NEXT: - Size: 8
-; CHECK-NEXT: Align: 8
-; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
-; CHECK-NEXT: - Size: 8
-; CHECK-NEXT: Align: 8
-; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
-; CHECK-NEXT: - Size: 8
-; CHECK-NEXT: Align: 8
-; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
-; CHECK-NEXT: - Size: 8
-; CHECK-NEXT: Align: 8
-; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
-; CHECK-NEXT: AddrSpaceQual: Global
-; CHECK-NOT: ValueKind: HiddenDefaultQueue
-; CHECK-NOT: ValueKind: HiddenCompletionAction
-
-declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64)
-
-define amdgpu_kernel void @test_kernel(i8 %a) #0
- !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
- !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
- ret void
-}
-
-attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
-
-!1 = !{i32 0}
-!2 = !{!"none"}
-!3 = !{!"char"}
-!4 = !{!""}
-
-!opencl.ocl.version = !{!90}
-!90 = !{i32 2, i32 0}
-
-; PARSER: AMDGPU HSA Metadata Parser Test: PASS
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-
-; CHECK: ---
-; CHECK: amdhsa.kernels:
-; CHECK: - .args:
-; CHECK-NEXT: - .name: a
-; CHECK-NEXT: .offset: 0
-; CHECK-NEXT: .size: 1
-; CHECK-NEXT: .type_name: char
-; CHECK-NEXT: .value_kind: by_value
-; CHECK-NEXT: - .offset: 8
-; CHECK-NEXT: .size: 8
-; CHECK-NEXT: .value_kind: hidden_global_offset_x
-; CHECK-NEXT: - .offset: 16
-; CHECK-NEXT: .size: 8
-; CHECK-NEXT: .value_kind: hidden_global_offset_y
-; CHECK-NEXT: - .offset: 24
-; CHECK-NEXT: .size: 8
-; CHECK-NEXT: .value_kind: hidden_global_offset_z
-; CHECK-NEXT: - .address_space: global
-; CHECK-NEXT: .offset: 32
-; CHECK-NEXT: .size: 8
-; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
-; CHECK: .language: OpenCL C
-; CHECK-NEXT: .language_version:
-; CHECK-NEXT: - 2
-; CHECK-NEXT: - 0
-; CHECK: .name: test_kernel
-; CHECK: .symbol: test_kernel.kd
-
-define amdgpu_kernel void @test_kernel(i8 %a) #0
- !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
- !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
- ret void
-}
-
-; CHECK: amdhsa.version:
-; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
-
-attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
-
-!1 = !{i32 0}
-!2 = !{!"none"}
-!3 = !{!"char"}
-!4 = !{!""}
-
-!opencl.ocl.version = !{!90}
-!90 = !{i32 2, i32 0}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_hostcall", i32 1}
-
-; PARSER: AMDGPU HSA Metadata Parser Test: PASS
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
@@ -48,7 +48,4 @@
!opencl.ocl.version = !{!90}
!90 = !{i32 2, i32 0}
-!llvm.module.flags = !{!0}
-!0 = !{i32 4, !"amdgpu_hostcall", i32 1}
-
; CHECK: AMDGPU HSA Metadata Parser Test: PASS
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll
+++ /dev/null
@@ -1,48 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-
-; CHECK: ---
-; CHECK: Version: [ 1, 0 ]
-; CHECK: Kernels:
-
-; CHECK: - Name: test_kernel
-; CHECK-NEXT: SymbolName: 'test_kernel@kd'
-; CHECK-NEXT: Language: OpenCL C
-; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
-; CHECK-NEXT: Args:
-; CHECK-NEXT: - Name: a
-; CHECK-NEXT: TypeName: char
-; CHECK-NEXT: Size: 1
-; CHECK-NEXT: Align: 1
-; CHECK-NEXT: ValueKind: ByValue
-; CHECK-NEXT: AccQual: Default
-; CHECK-NEXT: - Size: 8
-; CHECK-NEXT: Align: 8
-; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
-; CHECK-NEXT: - Size: 8
-; CHECK-NEXT: Align: 8
-; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
-; CHECK-NEXT: - Size: 8
-; CHECK-NEXT: Align: 8
-; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
-; CHECK-NOT: ValueKind: HiddenHostcallBuffer
-; CHECK-NOT: ValueKind: HiddenDefaultQueue
-; CHECK-NOT: ValueKind: HiddenCompletionAction
-
-define amdgpu_kernel void @test_kernel(i8 %a) #0
- !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
- !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
- ret void
-}
-
-attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
-
-!1 = !{i32 0}
-!2 = !{!"none"}
-!3 = !{!"char"}
-!4 = !{!""}
-
-!opencl.ocl.version = !{!90}
-!90 = !{i32 2, i32 0}
-
-; PARSER: AMDGPU HSA Metadata Parser Test: PASS
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-
-; CHECK: ---
-; CHECK: amdhsa.kernels:
-; CHECK: - .args:
-; CHECK-NEXT: - .name: a
-; CHECK-NEXT: .offset: 0
-; CHECK-NEXT: .size: 1
-; CHECK-NEXT: .type_name: char
-; CHECK-NEXT: .value_kind: by_value
-; CHECK-NEXT: - .offset: 8
-; CHECK-NEXT: .size: 8
-; CHECK-NEXT: .value_kind: hidden_global_offset_x
-; CHECK-NEXT: - .offset: 16
-; CHECK-NEXT: .size: 8
-; CHECK-NEXT: .value_kind: hidden_global_offset_y
-; CHECK-NEXT: - .offset: 24
-; CHECK-NEXT: .size: 8
-; CHECK-NEXT: .value_kind: hidden_global_offset_z
-
-; CHECK-NOT: .value_kind: hidden_hostcall_buffer
-
-; CHECK: .language: OpenCL C
-; CHECK-NEXT: .language_version:
-; CHECK-NEXT: - 2
-; CHECK-NEXT: - 0
-; CHECK: .name: test_kernel
-; CHECK: .symbol: test_kernel.kd
-
-define amdgpu_kernel void @test_kernel(i8 %a) #0
- !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
- !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
- ret void
-}
-
-; CHECK: amdhsa.version:
-; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
-
-attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
-
-!1 = !{i32 0}
-!2 = !{!"none"}
-!3 = !{!"char"}
-!4 = !{!""}
-
-!opencl.ocl.version = !{!90}
-!90 = !{i32 2, i32 0}
-
-; PARSER: AMDGPU HSA Metadata Parser Test: PASS
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll
@@ -177,7 +177,7 @@
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
-; CHECK-NEXT: ValueKind: HiddenNone
+; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: CodeProps:
define amdgpu_kernel void @test32(
@@ -221,7 +221,7 @@
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
-; CHECK-NEXT: ValueKind: HiddenNone
+; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
@@ -273,7 +273,7 @@
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
-; CHECK-NEXT: ValueKind: HiddenNone
+; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
@@ -112,10 +112,8 @@
ret void
}
-!llvm.module.flags = !{!0}
!llvm.printf.fmts = !{!1, !2}
-!0 = !{i32 1, !"amdgpu_hostcall", i32 1}
!1 = !{!"1:1:4:%d\5Cn"}
!2 = !{!"2:1:8:%g\5Cn"}
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
@@ -171,7 +171,7 @@
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 48
; CHECK-NEXT: .size: 8
-; CHECK-NEXT: .value_kind: hidden_none
+; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
; CHECK: .name: test32
; CHECK: .symbol: test32.kd
define amdgpu_kernel void @test32(
@@ -214,7 +214,7 @@
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 48
; CHECK-NEXT: .size: 8
-; CHECK-NEXT: .value_kind: hidden_none
+; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 56
; CHECK-NEXT: .size: 8
@@ -265,7 +265,7 @@
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 48
; CHECK-NEXT: .size: 8
-; CHECK-NEXT: .value_kind: hidden_none
+; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 56
; CHECK-NEXT: .size: 8
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll
@@ -26,6 +26,9 @@
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NOT: ValueKind: HiddenDefaultQueue
; CHECK-NOT: ValueKind: HiddenCompletionAction
define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0
@@ -56,7 +59,7 @@
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
-; CHECK-NEXT: ValueKind: HiddenNone
+; CHECK-NEXT: ValueKind: HiddenHostcallBuffer
; CHECK-NEXT: AddrSpaceQual: Global
; CHECK-NEXT: - Size: 8
; CHECK-NEXT: Align: 8
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
@@ -50,7 +50,7 @@
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 32
; CHECK-NEXT: .size: 8
-; CHECK-NEXT: .value_kind: hidden_none
+; CHECK-NEXT: .value_kind: hidden_hostcall_buffer
; CHECK-NEXT: - .address_space: global
; CHECK-NEXT: .offset: 40
; CHECK-NEXT: .size: 8
Index: llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -42,6 +42,6 @@
;.
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
;.
Index: llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -35,6 +35,6 @@
ret void
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
+++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -418,13 +418,13 @@
; AKF_CHECK: attributes #[[ATTR1]] = { nounwind }
;.
; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
;.
Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -647,15 +647,15 @@
; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -837,7 +837,7 @@
ret float %fadd
}
-; Implicit arguments need to be enabled for sanitizers
+; Hostcall needs to be enabled for sanitizers
define amdgpu_kernel void @kern_sanitize_address() #4 {
; AKF_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
; AKF_HSA-SAME: () #[[ATTR5:[0-9]+]] {
@@ -853,7 +853,7 @@
ret void
}
-; Implicit arguments need to be enabled for sanitizers
+; Hostcall needs to be enabled for sanitizers
define void @func_sanitize_address() #4 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
; AKF_HSA-SAME: () #[[ATTR5]] {
@@ -869,7 +869,7 @@
ret void
}
-; Implicit arguments need to be enabled for sanitizers
+; Hostcall needs to be enabled for sanitizers
define void @func_indirect_sanitize_address() #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
; AKF_HSA-SAME: () #[[ATTR3]] {
@@ -885,7 +885,7 @@
ret void
}
-; Implicit arguments need to be enabled for sanitizers
+; Hostcall needs to be enabled for sanitizers
define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
; AKF_HSA-SAME: () #[[ATTR4]] {
@@ -937,22 +937,22 @@
; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
Index: llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -230,6 +230,6 @@
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
Index: llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
===================================================================
--- llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -50,9 +50,6 @@
auto Int64Ty = Builder.getInt64Ty();
auto M = Builder.GetInsertBlock()->getModule();
auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
- if (!M->getModuleFlag("amdgpu_hostcall")) {
- M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
- }
return Builder.CreateCall(Fn, Version);
}
Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -54,6 +54,9 @@
/// false otherwise.
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
+/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
+unsigned getHostcallImplicitArgPosition();
+
struct GcnBufferFormatInfo {
unsigned Format;
unsigned BitsPerComp;
Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -136,6 +136,22 @@
isHsaAbiVersion5(STI);
}
+// FIXME: All such magic numbers about the ABI should be in a
+// central TD file.
+unsigned getHostcallImplicitArgPosition() {
+ switch (AmdhsaCodeObjectVersion) {
+ case 2:
+ case 3:
+ case 4:
+ return 24;
+ case 5:
+ return 80;
+ default:
+ llvm_unreachable("Unexpected code object version");
+ return 0;
+ }
+}
+
#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL
Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -421,6 +421,7 @@
// Pointer to where the ABI inserts special kernel arguments separate from the
// user arguments. This is an offset from the KernargSegmentPtr.
bool ImplicitArgPtr : 1;
+ bool HostcallPtr : 1;
// The hard-wired high half of the address of the global information table
// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
@@ -694,6 +695,10 @@
return ImplicitArgPtr;
}
+ bool hasHostcallPtr() const {
+ return HostcallPtr;
+ }
+
bool hasImplicitBufferPtr() const {
return ImplicitBufferPtr;
}
Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -47,6 +47,7 @@
WorkItemIDZ(false),
ImplicitBufferPtr(false),
ImplicitArgPtr(false),
+ HostcallPtr(false),
GITPtrHigh(0xffffffff),
HighBitsOf32BitAddress(0),
GDSSize(0) {
@@ -134,6 +135,9 @@
if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
DispatchID = true;
+
+ if (!F.hasFnAttribute("amdgpu-no-hostcall-ptr"))
+ HostcallPtr = true;
}
// FIXME: This attribute is a hack, we just need an analysis on the function
Index: llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -405,7 +405,7 @@
if (HiddenArgNumBytes >= 32) {
if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
- else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
+ else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
// The printf runtime binding pass should have ensured that hostcall and
// printf are not used in the same module.
assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
@@ -794,6 +794,7 @@
msgpack::ArrayDocNode Args) {
auto &Func = MF.getFunction();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
if (!HiddenArgNumBytes)
@@ -822,7 +823,7 @@
if (M->getNamedMetadata("llvm.printf.fmts"))
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
Args);
- else if (M->getModuleFlag("amdgpu_hostcall")) {
+ else if (MFI.hasHostcallPtr()) {
// The printf runtime binding pass should have ensured that hostcall and
// printf are not used in the same module.
assert(!M->getNamedMetadata("llvm.printf.fmts"));
@@ -973,6 +974,7 @@
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const Module *M = Func.getParent();
auto &DL = M->getDataLayout();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
auto Int64Ty = Type::getInt64Ty(Func.getContext());
auto Int32Ty = Type::getInt32Ty(Func.getContext());
@@ -1011,7 +1013,7 @@
} else
Offset += 8; // Skipped.
- if (M->getModuleFlag("amdgpu_hostcall")) {
+ if (MFI.hasHostcallPtr()) {
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
Args);
} else
@@ -1041,7 +1043,6 @@
} else
Offset += 8; // Skipped.
- const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
if (MFI.hasQueuePtr())
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
}
Index: llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -12,6 +12,7 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
@@ -102,7 +103,7 @@
/// Returns true if the function requires the implicit argument be passed
/// regardless of the function contents.
-static bool funcRequiresImplicitArgPtr(const Function &F) {
+static bool funcRequiresHostcallPtr(const Function &F) {
// Sanitizers require the hostcall buffer passed in the implicit arguments.
return F.hasFnAttribute(Attribute::SanitizeAddress) ||
F.hasFnAttribute(Attribute::SanitizeThread) ||
@@ -341,12 +342,15 @@
// If the function requires the implicit arg pointer due to sanitizers,
// assume it's needed even if explicitly marked as not requiring it.
- const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F);
- if (NeedsImplicit)
+ const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
+ if (NeedsHostcall) {
removeAssumedBits(IMPLICIT_ARG_PTR);
+ removeAssumedBits(HOSTCALL_PTR);
+ }
for (auto Attr : ImplicitAttrs) {
- if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR)
+ if (NeedsHostcall &&
+ (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
continue;
if (F->hasFnAttribute(Attr.second))
@@ -402,6 +406,10 @@
removeAssumedBits(QUEUE_PTR);
}
+ if (funcRetrievesHostcallPtr(A)) {
+ removeAssumedBits(HOSTCALL_PTR);
+ }
+
return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
: ChangeStatus::UNCHANGED;
}
@@ -483,6 +491,82 @@
return false;
}
+
+ bool funcRetrievesHostcallPtr(Attributor &A) {
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+ const auto &DL = InfoCache.getDL();
+ auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
+
+ // Check if this is a call to the implicitarg_ptr builtin and it
+ // is used to retrieve the hostcall pointer. The implicit arg for
+ // hostcall is not used only if every use of the implicitarg_ptr
+ // is a load that clearly does not retrieve any byte of the
+ // hostcall pointer. We check this by tracing all the uses of the
+ // initial call to the implicitarg_ptr intrinsic.
+ auto DoesNotLeadToHostcallPtr = [&](Instruction &I) {
+ auto &Call = cast<CallBase>(I);
+ if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
+ return true;
+
+ SmallVector<std::pair<const User *, unsigned>, 16> WorkList;
+ SmallPtrSet<const User *, 16> Visited;
+
+ for (const auto *U : Call.users()) {
+ WorkList.push_back(std::make_pair(U, 0));
+ Visited.insert(U);
+ }
+
+ while (!WorkList.empty()) {
+ auto UseInfo = WorkList.back();
+ WorkList.pop_back();
+ const auto *V = UseInfo.first;
+ auto AccumulatedOffset = UseInfo.second;
+
+ if (const auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ // Recursively look through the offsets computed by any
+ // chain of GEPs. If the offset is not constant,
+ // conservatively assume that the implictarg_ptr may be
+ // indexed to retrieve the hostcall pointer.
+ APInt GEPOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+ if (!GEP->accumulateConstantOffset(DL, GEPOffset))
+ return false;
+
+ AccumulatedOffset += GEPOffset.getZExtValue();
+ } else if (const auto *Load = dyn_cast<LoadInst>(V)) {
+ // A range check to see if the load retrieves any byte of the
+ // hostcall pointer from implicitarg_ptr.
+ unsigned Size = DL.getTypeStoreSize(Load->getType());
+ if (AccumulatedOffset < (Pos + 8) && (AccumulatedOffset + Size) > Pos)
+ return false;
+ continue; // don't enqueue users
+ } else if (const auto *Cast = dyn_cast<CastInst>(V)) {
+ // Look through any cast to a pointer type. The actual
+ // access will most likely cast the original i8* to an i64*,
+ // but we can be more permissive than that, since we will
+ // check the range of bytes anyway.
+ if (!Cast->getType()->isPointerTy())
+ return false;
+ } else {
+ return false;
+ }
+
+ for (const auto *U : V->users()) {
+ if (U->isDroppable())
+ continue;
+ if (Visited.insert(U).second)
+ WorkList.push_back(std::make_pair(U, AccumulatedOffset));
+ }
+ }
+
+ // All the users were explained away, so we know that the
+ // hostcall pointer was not accessed via this implicitarg_ptr.
+ return true;
+ };
+
+ bool UsedAssumedInformation = false;
+ return !A.checkForAllCallLikeInstructions(DoesNotLeadToHostcallPtr, *this,
+ UsedAssumedInformation);
+ }
};
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
Index: llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
+++ llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
@@ -18,6 +18,7 @@
AMDGPU_ATTRIBUTE(QUEUE_PTR, "amdgpu-no-queue-ptr")
AMDGPU_ATTRIBUTE(DISPATCH_ID, "amdgpu-no-dispatch-id")
AMDGPU_ATTRIBUTE(IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr")
+AMDGPU_ATTRIBUTE(HOSTCALL_PTR, "amdgpu-no-hostcall-ptr")
AMDGPU_ATTRIBUTE(WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x")
AMDGPU_ATTRIBUTE(WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y")
AMDGPU_ATTRIBUTE(WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z")
Index: clang/test/CodeGenCUDA/amdgpu-asan.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-asan.cu
+++ clang/test/CodeGenCUDA/amdgpu-asan.cu
@@ -9,12 +9,12 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
// RUN: -mlink-bitcode-file %t.asanrtl.bc -x hip \
-// RUN: | FileCheck -check-prefixes=ASAN,MFCHECK %s
+// RUN: | FileCheck -check-prefixes=ASAN %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
// RUN: -O3 -mlink-bitcode-file %t.asanrtl.bc -x hip \
-// RUN: | FileCheck -check-prefixes=ASAN,MFCHECK %s
+// RUN: | FileCheck -check-prefixes=ASAN %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -target-cpu gfx906 -x hip \
@@ -27,8 +27,5 @@
// ASAN-DAG: @llvm.compiler.used = {{.*}}@__amdgpu_device_library_preserve_asan_functions_ptr
// ASAN-DAG: define weak void @__asan_report_load1(i64 %{{.*}})
-// MFCHECK: !llvm.module.flags = !{![[FLAG1:[0-9]+]], ![[FLAG2:[0-9]+]]}
-// MFCHECK: ![[FLAG1]] = !{i32 4, !"amdgpu_hostcall", i32 1}
-
// CHECK-NOT: @__amdgpu_device_library_preserve_asan_functions
// CHECK-NOT: @__asan_report_load1
Index: clang/test/CodeGenCUDA/amdgpu-asan-printf.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-asan-printf.cu
+++ /dev/null
@@ -1,18 +0,0 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
-// RUN: -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
-// RUN: -O3 -x hip | FileCheck -check-prefixes=MFCHECK %s
-
-// MFCHECK: !llvm.module.flags = !{![[FLAG1:[0-9]+]], ![[FLAG2:[0-9]+]]}
-// MFCHECK: ![[FLAG1]] = !{i32 4, !"amdgpu_hostcall", i32 1}
-
-// Test to check hostcall module flag metadata is generated correctly
-// when a program has printf call and compiled with -fsanitize=address.
-#include "Inputs/cuda.h"
-__device__ void non_kernel() {
- printf("sanitized device function");
-}
-
-__global__ void kernel() {
- non_kernel();
-}
-
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -565,9 +565,6 @@
"__amdgpu_device_library_preserve_asan_functions_ptr", nullptr,
llvm::GlobalVariable::NotThreadLocal);
addCompilerUsedGlobal(Var);
- if (!getModule().getModuleFlag("amdgpu_hostcall")) {
- getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
- }
}
emitLLVMUsed();
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits