sameerds updated this revision to Diff 406889.
sameerds added a comment.

added tests for i128 load. hostcall position is now independent of subtarget.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D119216/new/

https://reviews.llvm.org/D119216

Files:
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/amdgpu-asan-printf.cu
  clang/test/CodeGenCUDA/amdgpu-asan.cu
  llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
  llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
  llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
  llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
  llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
  llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
  llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
  llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
  llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
  llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
  llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
  llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
  llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
  llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
  llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
  llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll
  llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
  llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
  llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll
  llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll
  llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll
  llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
  llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll
  llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll
  llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
  llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
  llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
  llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
  llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
  llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
  llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
  llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
  llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
  llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
  mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp

Index: mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
===================================================================
--- mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
+++ mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
@@ -308,11 +308,6 @@
     }
   }
 
-  // Set amdgpu_hostcall if host calls have been linked, as needed by newer LLVM
-  // FIXME: Is there a way to set this during printf() lowering that makes sense
-  if (ret->getFunction("__ockl_hostcall_internal"))
-    if (!ret->getModuleFlag("amdgpu_hostcall"))
-      ret->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
   return ret;
 }
 
Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
+++ llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
@@ -61,5 +61,5 @@
 
 attributes #0 = { "uniform-work-group-size"="false" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
@@ -101,7 +101,7 @@
 attributes #0 = { nounwind readnone }
 attributes #1 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
+++ llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
@@ -41,6 +41,6 @@
 
 attributes #1 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
+++ llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
@@ -41,6 +41,6 @@
 
 attributes #2 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
+++ llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
@@ -97,6 +97,6 @@
 
 attributes #0 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
+++ llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
@@ -31,5 +31,5 @@
 
 attributes #0 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -73,6 +73,6 @@
 ;.
 ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
 ;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
+++ llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
@@ -202,13 +202,13 @@
 attributes #6 = { "amdgpu-flat-work-group-size"="512,512" }
 attributes #7 = { "amdgpu-flat-work-group-size"="64,256" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
@@ -0,0 +1,229 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s
+declare void @function1()
+
+declare void @function2() #0
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
+
+; CHECK: amdhsa.kernels:
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel10
+define amdgpu_kernel void @test_kernel10(i8* %a) {
+  store i8 3, i8* %a, align 1
+  ret void
+}
+
+; Call to an extern function
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel20
+define amdgpu_kernel void @test_kernel20(i8* %a) {
+  call void @function1()
+  store i8 3, i8* %a, align 1
+  ret void
+}
+
+; Explicit attribute on kernel
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel21
+define amdgpu_kernel void @test_kernel21(i8* %a) #0 {
+  call void @function1()
+  store i8 3, i8* %a, align 1
+  ret void
+}
+
+; Explicit attribute on extern callee
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel22
+define amdgpu_kernel void @test_kernel22(i8* %a) {
+  call void @function2()
+  store i8 3, i8* %a, align 1
+  ret void
+}
+
+; Access more bytes than the pointer size
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel30
+define amdgpu_kernel void @test_kernel30(i128* %a) {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 72
+  %cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)*
+  %x = load i128, i128 addrspace(4)* %cast
+  store i128 %x, i128* %a
+  ret void
+}
+
+; Typical load of hostcall buffer pointer
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel40
+define amdgpu_kernel void @test_kernel40(i64* %a) {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
+  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+  %x = load i64, i64 addrspace(4)* %cast
+  store i64 %x, i64* %a
+  ret void
+}
+
+; Typical usage, overriden by explicit attribute on kernel
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel41
+define amdgpu_kernel void @test_kernel41(i64* %a) #0 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
+  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+  %x = load i64, i64 addrspace(4)* %cast
+  store i64 %x, i64* %a
+  ret void
+}
+
+; Access to implicit arg before the hostcall pointer
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel42
+define amdgpu_kernel void @test_kernel42(i64* %a) {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 72
+  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+  %x = load i64, i64 addrspace(4)* %cast
+  store i64 %x, i64* %a
+  ret void
+}
+
+; Access to implicit arg after the hostcall pointer
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel43
+define amdgpu_kernel void @test_kernel43(i64* %a) {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 88
+  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+  %x = load i64, i64 addrspace(4)* %cast
+  store i64 %x, i64* %a
+  ret void
+}
+
+; Accessing a byte just before the hostcall pointer
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel44
+define amdgpu_kernel void @test_kernel44(i8* %a) {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 79
+  %x = load i8, i8 addrspace(4)* %gep, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Accessing a byte inside the hostcall pointer
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel45
+define amdgpu_kernel void @test_kernel45(i8* %a) {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 80
+  %x = load i8, i8 addrspace(4)* %gep, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Accessing a byte inside the hostcall pointer
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel46
+define amdgpu_kernel void @test_kernel46(i8* %a) {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 87
+  %x = load i8, i8 addrspace(4)* %gep, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Accessing a byte just after the hostcall pointer
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel47
+define amdgpu_kernel void @test_kernel47(i8* %a) {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 88
+  %x = load i8, i8 addrspace(4)* %gep, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Access with an unknown offset
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel50
+define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b
+  %x = load i8, i8 addrspace(4)* %gep, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Multiple geps reaching the hostcall pointer argument.
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel51
+define amdgpu_kernel void @test_kernel51(i8* %a) {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
+  %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 64
+  %x = load i8, i8 addrspace(4)* %gep2, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Multiple geps not reaching the hostcall pointer argument.
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel52
+define amdgpu_kernel void @test_kernel52(i8* %a) {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
+  %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16
+  %x = load i8, i8 addrspace(4)* %gep2, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Access that does not match a known pattern.
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel60
+define amdgpu_kernel void @test_kernel60(i64* %a, i32 %b) {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
+  %x = ptrtoint i8 addrspace(4)* %gep to i64
+  store i64 %x, i64* %a, align 4
+  ret void
+}
+
+attributes #0 = { "amdgpu-no-hostcall-ptr" }
+attributes #1 = { nounwind readnone speculatable willreturn }
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
@@ -0,0 +1,231 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=CHECK %s
+declare void @function1()
+
+declare void @function2() #0
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #1
+
+; CHECK: amdhsa.kernels:
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel10
+define amdgpu_kernel void @test_kernel10(i8* %a) #2 {
+  store i8 3, i8* %a, align 1
+  ret void
+}
+
+; Call to an extern function
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel20
+define amdgpu_kernel void @test_kernel20(i8* %a) #2 {
+  call void @function1()
+  store i8 3, i8* %a, align 1
+  ret void
+}
+
+; Explicit attribute on kernel
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel21
+define amdgpu_kernel void @test_kernel21(i8* %a) #3 {
+  call void @function1()
+  store i8 3, i8* %a, align 1
+  ret void
+}
+
+; Explicit attribute on extern callee
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel22
+define amdgpu_kernel void @test_kernel22(i8* %a) #2 {
+  call void @function2()
+  store i8 3, i8* %a, align 1
+  ret void
+}
+
+; Access more bytes than the pointer size
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel30
+define amdgpu_kernel void @test_kernel30(i128* %a) #2 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
+  %cast = bitcast i8 addrspace(4)* %gep to i128 addrspace(4)*
+  %x = load i128, i128 addrspace(4)* %cast
+  store i128 %x, i128* %a
+  ret void
+}
+
+; Typical load of hostcall buffer pointer
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel40
+define amdgpu_kernel void @test_kernel40(i64* %a) #2 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
+  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+  %x = load i64, i64 addrspace(4)* %cast
+  store i64 %x, i64* %a
+  ret void
+}
+
+; Typical usage, overriden by explicit attribute on kernel
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel41
+define amdgpu_kernel void @test_kernel41(i64* %a) #3 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
+  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+  %x = load i64, i64 addrspace(4)* %cast
+  store i64 %x, i64* %a
+  ret void
+}
+
+; Access to implicit arg before the hostcall pointer
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel42
+define amdgpu_kernel void @test_kernel42(i64* %a) #2 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
+  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+  %x = load i64, i64 addrspace(4)* %cast
+  store i64 %x, i64* %a
+  ret void
+}
+
+; Access to implicit arg after the hostcall pointer
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel43
+define amdgpu_kernel void @test_kernel43(i64* %a) #2 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32
+  %cast = bitcast i8 addrspace(4)* %gep to i64 addrspace(4)*
+  %x = load i64, i64 addrspace(4)* %cast
+  store i64 %x, i64* %a
+  ret void
+}
+
+; Accessing a byte just before the hostcall pointer
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel44
+define amdgpu_kernel void @test_kernel44(i8* %a) #2 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 23
+  %x = load i8, i8 addrspace(4)* %gep, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Accessing a byte inside the hostcall pointer
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel45
+define amdgpu_kernel void @test_kernel45(i8* %a) #2 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 24
+  %x = load i8, i8 addrspace(4)* %gep, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Accessing a byte inside the hostcall pointer
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel46
+define amdgpu_kernel void @test_kernel46(i8* %a) #2 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 31
+  %x = load i8, i8 addrspace(4)* %gep, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Accessing a byte just after the hostcall pointer
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel47
+define amdgpu_kernel void @test_kernel47(i8* %a) #2 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 32
+  %x = load i8, i8 addrspace(4)* %gep, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Access with an unknown offset
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel50
+define amdgpu_kernel void @test_kernel50(i8* %a, i32 %b) #2 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 %b
+  %x = load i8, i8 addrspace(4)* %gep, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Multiple geps reaching the hostcall pointer argument.
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel51
+define amdgpu_kernel void @test_kernel51(i8* %a) #2 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
+  %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 8
+  %x = load i8, i8 addrspace(4)* %gep2, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Multiple geps not reaching the hostcall pointer argument.
+
+; CHECK:  - .args:
+; CHECK-NOT: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel52
+define amdgpu_kernel void @test_kernel52(i8* %a) #2 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep1 = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i64 16
+  %gep2 = getelementptr inbounds i8, i8 addrspace(4)* %gep1, i64 16
+  %x = load i8, i8 addrspace(4)* %gep2, align 1
+  store i8 %x, i8* %a, align 1
+  ret void
+}
+
+; Access that does not match a known pattern.
+
+; CHECK:  - .args:
+; CHECK: hidden_hostcall_buffer
+; CHECK-LABEL:    .name:           test_kernel60
+define amdgpu_kernel void @test_kernel60(i64* %a, i32 %b) #2 {
+  %ptr = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr inbounds i8, i8 addrspace(4)* %ptr, i32 42
+  %x = ptrtoint i8 addrspace(4)* %gep to i64
+  store i64 %x, i64* %a, align 4
+  ret void
+}
+
+attributes #0 = { "amdgpu-no-hostcall-ptr" }
+attributes #1 = { nounwind readnone speculatable willreturn }
+attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" }
+attributes #3 = { "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" }
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-
-; CHECK: ---
-; CHECK:  Version: [ 1, 0 ]
-; CHECK:  Kernels:
-
-; CHECK:      - Name:            test_kernel
-; CHECK-NEXT:   SymbolName:      'test_kernel@kd'
-; CHECK-NEXT:   Language:        OpenCL C
-; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
-; CHECK-NEXT:   Args:
-; CHECK-NEXT:     - Name:          a
-; CHECK-NEXT:       TypeName:      char
-; CHECK-NEXT:       Size:          1
-; CHECK-NEXT:       Align:         1
-; CHECK-NEXT:       ValueKind:     ByValue
-; CHECK-NEXT:       AccQual:       Default
-; CHECK-NEXT:     - Size:          8
-; CHECK-NEXT:       Align:         8
-; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetX
-; CHECK-NEXT:     - Size:          8
-; CHECK-NEXT:       Align:         8
-; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetY
-; CHECK-NEXT:     - Size:          8
-; CHECK-NEXT:       Align:         8
-; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetZ
-; CHECK-NEXT:     - Size:            8
-; CHECK-NEXT:       Align:           8
-; CHECK-NEXT:       ValueKind:       HiddenHostcallBuffer
-; CHECK-NEXT:       AddrSpaceQual:   Global
-; CHECK-NOT:        ValueKind:     HiddenDefaultQueue
-; CHECK-NOT:        ValueKind:     HiddenCompletionAction
-
-declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64)
-
-define amdgpu_kernel void @test_kernel(i8 %a) #0
-    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
-    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
-
-!1 = !{i32 0}
-!2 = !{!"none"}
-!3 = !{!"char"}
-!4 = !{!""}
-
-!opencl.ocl.version = !{!90}
-!90 = !{i32 2, i32 0}
-
-; PARSER: AMDGPU HSA Metadata Parser Test: PASS
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-
-; CHECK:              ---
-; CHECK:      amdhsa.kernels:
-; CHECK:        - .args:
-; CHECK-NEXT:       - .name:           a
-; CHECK-NEXT:         .offset:         0
-; CHECK-NEXT:         .size:           1
-; CHECK-NEXT:         .type_name:      char
-; CHECK-NEXT:         .value_kind:     by_value
-; CHECK-NEXT:       - .offset:         8
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
-; CHECK-NEXT:       - .offset:         16
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_global_offset_y
-; CHECK-NEXT:       - .offset:         24
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_global_offset_z
-; CHECK-NEXT:       - .address_space:  global
-; CHECK-NEXT:         .offset:         32
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_hostcall_buffer
-; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version:
-; CHECK-NEXT:       - 2
-; CHECK-NEXT:       - 0
-; CHECK:          .name:           test_kernel
-; CHECK:          .symbol:         test_kernel.kd
-
-define amdgpu_kernel void @test_kernel(i8 %a) #0
-    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
-    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK:  amdhsa.version:
-; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
-
-attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
-
-!1 = !{i32 0}
-!2 = !{!"none"}
-!3 = !{!"char"}
-!4 = !{!""}
-
-!opencl.ocl.version = !{!90}
-!90 = !{i32 2, i32 0}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_hostcall", i32 1}
-
-; PARSER: AMDGPU HSA Metadata Parser Test: PASS
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
@@ -48,7 +48,4 @@
 !opencl.ocl.version = !{!90}
 !90 = !{i32 2, i32 0}
 
-!llvm.module.flags = !{!0}
-!0 = !{i32 4, !"amdgpu_hostcall", i32 1}
-
 ; CHECK: AMDGPU HSA Metadata Parser Test: PASS
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll
+++ /dev/null
@@ -1,48 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-
-; CHECK: ---
-; CHECK:  Version: [ 1, 0 ]
-; CHECK:  Kernels:
-
-; CHECK:      - Name:            test_kernel
-; CHECK-NEXT:   SymbolName:      'test_kernel@kd'
-; CHECK-NEXT:   Language:        OpenCL C
-; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
-; CHECK-NEXT:   Args:
-; CHECK-NEXT:     - Name:          a
-; CHECK-NEXT:       TypeName:      char
-; CHECK-NEXT:       Size:          1
-; CHECK-NEXT:       Align:         1
-; CHECK-NEXT:       ValueKind:     ByValue
-; CHECK-NEXT:       AccQual:       Default
-; CHECK-NEXT:     - Size:          8
-; CHECK-NEXT:       Align:         8
-; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetX
-; CHECK-NEXT:     - Size:          8
-; CHECK-NEXT:       Align:         8
-; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetY
-; CHECK-NEXT:     - Size:          8
-; CHECK-NEXT:       Align:         8
-; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetZ
-; CHECK-NOT:        ValueKind:     HiddenHostcallBuffer
-; CHECK-NOT:        ValueKind:     HiddenDefaultQueue
-; CHECK-NOT:        ValueKind:     HiddenCompletionAction
-
-define amdgpu_kernel void @test_kernel(i8 %a) #0
-    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
-    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
-
-!1 = !{i32 0}
-!2 = !{!"none"}
-!3 = !{!"char"}
-!4 = !{!""}
-
-!opencl.ocl.version = !{!90}
-!90 = !{i32 2, i32 0}
-
-; PARSER: AMDGPU HSA Metadata Parser Test: PASS
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-
-; CHECK:              ---
-; CHECK:      amdhsa.kernels:
-; CHECK:        - .args:
-; CHECK-NEXT:       - .name:           a
-; CHECK-NEXT:         .offset:         0
-; CHECK-NEXT:         .size:           1
-; CHECK-NEXT:         .type_name:      char
-; CHECK-NEXT:         .value_kind:     by_value
-; CHECK-NEXT:       - .offset:         8
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
-; CHECK-NEXT:       - .offset:         16
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_global_offset_y
-; CHECK-NEXT:       - .offset:         24
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_global_offset_z
-
-; CHECK-NOT:          .value_kind:     hidden_hostcall_buffer
-
-; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version:
-; CHECK-NEXT:       - 2
-; CHECK-NEXT:       - 0
-; CHECK:          .name:           test_kernel
-; CHECK:          .symbol:         test_kernel.kd
-
-define amdgpu_kernel void @test_kernel(i8 %a) #0
-    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
-    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK:  amdhsa.version:
-; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
-
-attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
-
-!1 = !{i32 0}
-!2 = !{!"none"}
-!3 = !{!"char"}
-!4 = !{!""}
-
-!opencl.ocl.version = !{!90}
-!90 = !{i32 2, i32 0}
-
-; PARSER: AMDGPU HSA Metadata Parser Test: PASS
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll
@@ -177,7 +177,7 @@
 ; CHECK-NEXT:       ValueKind:       HiddenGlobalOffsetZ
 ; CHECK-NEXT:     - Size:            8
 ; CHECK-NEXT:       Align:           8
-; CHECK-NEXT:       ValueKind:       HiddenNone
+; CHECK-NEXT:       ValueKind:       HiddenHostcallBuffer
 ; CHECK-NEXT:       AddrSpaceQual:   Global
 ; CHECK-NEXT:   CodeProps:
 define amdgpu_kernel void @test32(
@@ -221,7 +221,7 @@
 ; CHECK-NEXT:       ValueKind:       HiddenGlobalOffsetZ
 ; CHECK-NEXT:     - Size:            8
 ; CHECK-NEXT:       Align:           8
-; CHECK-NEXT:       ValueKind:       HiddenNone
+; CHECK-NEXT:       ValueKind:       HiddenHostcallBuffer
 ; CHECK-NEXT:       AddrSpaceQual:   Global
 ; CHECK-NEXT:     - Size:            8
 ; CHECK-NEXT:       Align:           8
@@ -273,7 +273,7 @@
 ; CHECK-NEXT:       ValueKind:       HiddenGlobalOffsetZ
 ; CHECK-NEXT:     - Size:            8
 ; CHECK-NEXT:       Align:           8
-; CHECK-NEXT:       ValueKind:       HiddenNone
+; CHECK-NEXT:       ValueKind:       HiddenHostcallBuffer
 ; CHECK-NEXT:       AddrSpaceQual:   Global
 ; CHECK-NEXT:     - Size:            8
 ; CHECK-NEXT:       Align:           8
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v5.ll
@@ -112,10 +112,8 @@
   ret void
 }
 
-!llvm.module.flags = !{!0}
 !llvm.printf.fmts = !{!1, !2}
 
-!0 = !{i32 1, !"amdgpu_hostcall", i32 1}
 !1 = !{!"1:1:4:%d\5Cn"}
 !2 = !{!"2:1:8:%g\5Cn"}
 
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
@@ -171,7 +171,7 @@
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .offset:         48
 ; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_none
+; CHECK-NEXT:         .value_kind:     hidden_hostcall_buffer
 ; CHECK:          .name:           test32
 ; CHECK:          .symbol:         test32.kd
 define amdgpu_kernel void @test32(
@@ -214,7 +214,7 @@
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .offset:         48
 ; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_none
+; CHECK-NEXT:         .value_kind:     hidden_hostcall_buffer
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .offset:         56
 ; CHECK-NEXT:         .size:           8
@@ -265,7 +265,7 @@
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .offset:         48
 ; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_none
+; CHECK-NEXT:         .value_kind:     hidden_hostcall_buffer
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .offset:         56
 ; CHECK-NEXT:         .size:           8
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel.ll
@@ -26,6 +26,9 @@
 ; CHECK-NEXT:     - Size:          8
 ; CHECK-NEXT:       Align:         8
 ; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetZ
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenHostcallBuffer
 ; CHECK-NOT:        ValueKind:     HiddenDefaultQueue
 ; CHECK-NOT:        ValueKind:     HiddenCompletionAction
 define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0
@@ -56,7 +59,7 @@
 ; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetZ
 ; CHECK-NEXT:     - Size:          8
 ; CHECK-NEXT:       Align:         8
-; CHECK-NEXT:       ValueKind:     HiddenNone
+; CHECK-NEXT:       ValueKind:     HiddenHostcallBuffer
 ; CHECK-NEXT:       AddrSpaceQual: Global
 ; CHECK-NEXT:     - Size:          8
 ; CHECK-NEXT:       Align:         8
Index: llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
+++ llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
@@ -50,7 +50,7 @@
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .offset:         32
 ; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_none
+; CHECK-NEXT:         .value_kind:     hidden_hostcall_buffer
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .offset:         40
 ; CHECK-NEXT:         .size:           8
Index: llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -42,6 +42,6 @@
 ;.
 ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
 ;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -35,6 +35,6 @@
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
+++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -418,13 +418,13 @@
 ; AKF_CHECK: attributes #[[ATTR1]] = { nounwind }
 ;.
 ; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -647,15 +647,15 @@
 ; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-stack-objects" }
 ;.
 ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -837,7 +837,7 @@
   ret float %fadd
 }
 
-; Implicit arguments need to be enabled for sanitizers
+; Hostcall needs to be enabled for sanitizers
 define amdgpu_kernel void @kern_sanitize_address() #4 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
 ; AKF_HSA-SAME: () #[[ATTR5:[0-9]+]] {
@@ -853,7 +853,7 @@
   ret void
 }
 
-; Implicit arguments need to be enabled for sanitizers
+; Hostcall needs to be enabled for sanitizers
 define void @func_sanitize_address() #4 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
 ; AKF_HSA-SAME: () #[[ATTR5]] {
@@ -869,7 +869,7 @@
   ret void
 }
 
-; Implicit arguments need to be enabled for sanitizers
+; Hostcall needs to be enabled for sanitizers
 define void @func_indirect_sanitize_address() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
 ; AKF_HSA-SAME: () #[[ATTR3]] {
@@ -885,7 +885,7 @@
   ret void
 }
 
-; Implicit arguments need to be enabled for sanitizers
+; Hostcall needs to be enabled for sanitizers
 define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
 ; AKF_HSA-SAME: () #[[ATTR4]] {
@@ -937,22 +937,22 @@
 ; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
 ;.
 ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
Index: llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -230,6 +230,6 @@
 ; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
 ;.
 ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
Index: llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
===================================================================
--- llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -50,9 +50,6 @@
   auto Int64Ty = Builder.getInt64Ty();
   auto M = Builder.GetInsertBlock()->getModule();
   auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
-  if (!M->getModuleFlag("amdgpu_hostcall")) {
-    M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
-  }
   return Builder.CreateCall(Fn, Version);
 }
 
Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -54,6 +54,9 @@
 /// false otherwise.
 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
 
+/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
+unsigned getHostcallImplicitArgPosition();
+
 struct GcnBufferFormatInfo {
   unsigned Format;
   unsigned BitsPerComp;
Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -136,6 +136,22 @@
          isHsaAbiVersion5(STI);
 }
 
+// FIXME: All such magic numbers about the ABI should be in a
+// central TD file.
+unsigned getHostcallImplicitArgPosition() {
+  switch (AmdhsaCodeObjectVersion) {
+  case 2:
+  case 3:
+  case 4:
+    return 24;
+  case 5:
+    return 80;
+  default:
+    llvm_unreachable("Unexpected code object version");
+    return 0;
+  }
+}
+
 #define GET_MIMGBaseOpcodesTable_IMPL
 #define GET_MIMGDimInfoTable_IMPL
 #define GET_MIMGInfoTable_IMPL
Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
===================================================================
--- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -421,6 +421,7 @@
   // Pointer to where the ABI inserts special kernel arguments separate from the
   // user arguments. This is an offset from the KernargSegmentPtr.
   bool ImplicitArgPtr : 1;
+  bool HostcallPtr : 1;
 
   // The hard-wired high half of the address of the global information table
   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
@@ -694,6 +695,10 @@
     return ImplicitArgPtr;
   }
 
+  bool hasHostcallPtr() const {
+    return HostcallPtr;
+  }
+
   bool hasImplicitBufferPtr() const {
     return ImplicitBufferPtr;
   }
Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -47,6 +47,7 @@
     WorkItemIDZ(false),
     ImplicitBufferPtr(false),
     ImplicitArgPtr(false),
+    HostcallPtr(false),
     GITPtrHigh(0xffffffff),
     HighBitsOf32BitAddress(0),
     GDSSize(0) {
@@ -134,6 +135,9 @@
 
     if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
       DispatchID = true;
+
+    if (!F.hasFnAttribute("amdgpu-no-hostcall-ptr"))
+      HostcallPtr = true;
   }
 
   // FIXME: This attribute is a hack, we just need an analysis on the function
Index: llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -405,7 +405,7 @@
   if (HiddenArgNumBytes >= 32) {
     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
-    else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
+    else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
       // The printf runtime binding pass should have ensured that hostcall and
       // printf are not used in the same module.
       assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
@@ -794,6 +794,7 @@
                                               msgpack::ArrayDocNode Args) {
   auto &Func = MF.getFunction();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
 
   unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
   if (!HiddenArgNumBytes)
@@ -822,7 +823,7 @@
     if (M->getNamedMetadata("llvm.printf.fmts"))
       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
                     Args);
-    else if (M->getModuleFlag("amdgpu_hostcall")) {
+    else if (MFI.hasHostcallPtr()) {
       // The printf runtime binding pass should have ensured that hostcall and
       // printf are not used in the same module.
       assert(!M->getNamedMetadata("llvm.printf.fmts"));
@@ -973,6 +974,7 @@
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const Module *M = Func.getParent();
   auto &DL = M->getDataLayout();
+  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
 
   auto Int64Ty = Type::getInt64Ty(Func.getContext());
   auto Int32Ty = Type::getInt32Ty(Func.getContext());
@@ -1011,7 +1013,7 @@
   } else
     Offset += 8; // Skipped.
 
-  if (M->getModuleFlag("amdgpu_hostcall")) {
+  if (MFI.hasHostcallPtr()) {
     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
                   Args);
   } else
@@ -1041,7 +1043,6 @@
   } else
     Offset += 8; // Skipped.
 
-  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
   if (MFI.hasQueuePtr())
     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
 }
Index: llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -12,6 +12,7 @@
 
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
@@ -102,7 +103,7 @@
 
 /// Returns true if the function requires the implicit argument be passed
 /// regardless of the function contents.
-static bool funcRequiresImplicitArgPtr(const Function &F) {
+static bool funcRequiresHostcallPtr(const Function &F) {
   // Sanitizers require the hostcall buffer passed in the implicit arguments.
   return F.hasFnAttribute(Attribute::SanitizeAddress) ||
          F.hasFnAttribute(Attribute::SanitizeThread) ||
@@ -341,12 +342,15 @@
 
     // If the function requires the implicit arg pointer due to sanitizers,
     // assume it's needed even if explicitly marked as not requiring it.
-    const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F);
-    if (NeedsImplicit)
+    const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
+    if (NeedsHostcall) {
       removeAssumedBits(IMPLICIT_ARG_PTR);
+      removeAssumedBits(HOSTCALL_PTR);
+    }
 
     for (auto Attr : ImplicitAttrs) {
-      if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR)
+      if (NeedsHostcall &&
+          (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
         continue;
 
       if (F->hasFnAttribute(Attr.second))
@@ -402,6 +406,10 @@
       removeAssumedBits(QUEUE_PTR);
     }
 
+    if (funcRetrievesHostcallPtr(A)) {
+      removeAssumedBits(HOSTCALL_PTR);
+    }
+
     return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
                                        : ChangeStatus::UNCHANGED;
   }
@@ -483,6 +491,82 @@
 
     return false;
   }
+
+  bool funcRetrievesHostcallPtr(Attributor &A) {
+    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+    const auto &DL = InfoCache.getDL();
+    auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
+
+    // Check if this is a call to the implicitarg_ptr builtin and it
+    // is used to retrieve the hostcall pointer. The implicit arg for
+    // hostcall is not used only if every use of the implicitarg_ptr
+    // is a load that clearly does not retrieve any byte of the
+    // hostcall pointer. We check this by tracing all the uses of the
+    // initial call to the implicitarg_ptr intrinsic.
+    auto DoesNotLeadToHostcallPtr = [&](Instruction &I) {
+      auto &Call = cast<CallBase>(I);
+      if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
+        return true;
+
+      SmallVector<std::pair<const User *, unsigned>, 16> WorkList;
+      SmallPtrSet<const User *, 16> Visited;
+
+      for (const auto *U : Call.users()) {
+        WorkList.push_back(std::make_pair(U, 0));
+        Visited.insert(U);
+      }
+
+      while (!WorkList.empty()) {
+        auto UseInfo = WorkList.back();
+        WorkList.pop_back();
+        const auto *V = UseInfo.first;
+        auto AccumulatedOffset = UseInfo.second;
+
+        if (const auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
+          // Recursively look through the offsets computed by any
+          // chain of GEPs. If the offset is not constant,
+          // conservatively assume that the implictarg_ptr may be
+          // indexed to retrieve the hostcall pointer.
+          APInt GEPOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+          if (!GEP->accumulateConstantOffset(DL, GEPOffset))
+            return false;
+
+          AccumulatedOffset += GEPOffset.getZExtValue();
+        } else if (const auto *Load = dyn_cast<LoadInst>(V)) {
+          // A range check to see if the load retrieves any byte of the
+          // hostcall pointer from implicitarg_ptr.
+          unsigned Size = DL.getTypeStoreSize(Load->getType());
+          if (AccumulatedOffset < (Pos + 8) && (AccumulatedOffset + Size) > Pos)
+            return false;
+          continue; // don't enqueue users
+        } else if (const auto *Cast = dyn_cast<CastInst>(V)) {
+          // Look through any cast to a pointer type. The actual
+          // access will most likely cast the original i8* to an i64*,
+          // but we can be more permissive than that, since we will
+          // check the range of bytes anyway.
+          if (!Cast->getType()->isPointerTy())
+            return false;
+        } else {
+          return false;
+        }
+
+        for (const auto *U : V->users()) {
+          if (U->isDroppable())
+            continue;
+          if (Visited.insert(U).second)
+            WorkList.push_back(std::make_pair(U, AccumulatedOffset));
+        }
+      }
+
+      // All the users were explained away, so we know that the
+      // hostcall pointer was not accessed via this implicitarg_ptr.
+      return true;
+    };
+
+    bool UsedAssumedInformation = false;
+    return !A.checkForAllCallLikeInstructions(DoesNotLeadToHostcallPtr, *this,
+                                              UsedAssumedInformation);
+  }
 };
 
 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
Index: llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
+++ llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
@@ -18,6 +18,7 @@
 AMDGPU_ATTRIBUTE(QUEUE_PTR, "amdgpu-no-queue-ptr")
 AMDGPU_ATTRIBUTE(DISPATCH_ID, "amdgpu-no-dispatch-id")
 AMDGPU_ATTRIBUTE(IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr")
+AMDGPU_ATTRIBUTE(HOSTCALL_PTR, "amdgpu-no-hostcall-ptr")
 AMDGPU_ATTRIBUTE(WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x")
 AMDGPU_ATTRIBUTE(WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y")
 AMDGPU_ATTRIBUTE(WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z")
Index: clang/test/CodeGenCUDA/amdgpu-asan.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-asan.cu
+++ clang/test/CodeGenCUDA/amdgpu-asan.cu
@@ -9,12 +9,12 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
 // RUN:   -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
 // RUN:   -mlink-bitcode-file %t.asanrtl.bc -x hip \
-// RUN:   | FileCheck -check-prefixes=ASAN,MFCHECK %s
+// RUN:   | FileCheck -check-prefixes=ASAN %s
 
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
 // RUN:   -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
 // RUN:   -O3 -mlink-bitcode-file %t.asanrtl.bc -x hip \
-// RUN:   | FileCheck -check-prefixes=ASAN,MFCHECK %s
+// RUN:   | FileCheck -check-prefixes=ASAN %s
 
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
 // RUN:   -fcuda-is-device -target-cpu gfx906 -x hip \
@@ -27,8 +27,5 @@
 // ASAN-DAG: @llvm.compiler.used = {{.*}}@__amdgpu_device_library_preserve_asan_functions_ptr
 // ASAN-DAG: define weak void @__asan_report_load1(i64 %{{.*}})
 
-// MFCHECK: !llvm.module.flags = !{![[FLAG1:[0-9]+]], ![[FLAG2:[0-9]+]]}
-// MFCHECK: ![[FLAG1]] = !{i32 4, !"amdgpu_hostcall", i32 1}
-
 // CHECK-NOT: @__amdgpu_device_library_preserve_asan_functions
 // CHECK-NOT: @__asan_report_load1
Index: clang/test/CodeGenCUDA/amdgpu-asan-printf.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-asan-printf.cu
+++ /dev/null
@@ -1,18 +0,0 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
-// RUN:   -fcuda-is-device -target-cpu gfx906 -fsanitize=address \
-// RUN:   -O3 -x hip | FileCheck -check-prefixes=MFCHECK %s
-
-// MFCHECK: !llvm.module.flags = !{![[FLAG1:[0-9]+]], ![[FLAG2:[0-9]+]]}
-// MFCHECK: ![[FLAG1]] = !{i32 4, !"amdgpu_hostcall", i32 1}
-
-// Test to check hostcall module flag metadata is generated correctly
-// when a program has printf call and compiled with -fsanitize=address.
-#include "Inputs/cuda.h"
-__device__ void non_kernel() {
-  printf("sanitized device function");
-}
-
-__global__ void kernel() {
-  non_kernel();
-}
-
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -565,9 +565,6 @@
         "__amdgpu_device_library_preserve_asan_functions_ptr", nullptr,
         llvm::GlobalVariable::NotThreadLocal);
     addCompilerUsedGlobal(Var);
-    if (!getModule().getModuleFlag("amdgpu_hostcall")) {
-      getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
-    }
   }
 
   emitLLVMUsed();
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to