It seems like the real problem here is that we're giving the template
instantiation the wrong linkage. It can be used from outside this llvm
module, so it should be weak_odr instead of linkonce_odr.
On Sep 22, 2015 10:24 AM, "Artem Belevich via cfe-commits" <
cfe-commits@lists.llvm.org> wrote:

> Author: tra
> Date: Tue Sep 22 12:22:51 2015
> New Revision: 248293
>
> URL: http://llvm.org/viewvc/llvm-project?rev=248293&view=rev
> Log:
> [CUDA] Add implicit __attribute__((used)) to all __global__ functions.
>
> This makes sure that we emit kernels that were instantiated from the
> host code and which would never be explicitly referenced by anything
> else on device side.
>
> Differential Revision: http://reviews.llvm.org/D11666
>
> Modified:
>     cfe/trunk/lib/Sema/SemaDeclAttr.cpp
>     cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu
>
> Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?rev=248293&r1=248292&r2=248293&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/Sema/SemaDeclAttr.cpp (original)
> +++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp Tue Sep 22 12:22:51 2015
> @@ -3350,6 +3350,10 @@ static void handleGlobalAttr(Sema &S, De
>    D->addAttr(::new (S.Context)
>                CUDAGlobalAttr(Attr.getRange(), S.Context,
>                               Attr.getAttributeSpellingListIndex()));
> +
> +  // Add implicit attribute((used)) so we don't eliminate kernels
> +  // because there is nothing referencing them on device side.
> +  D->addAttr(UsedAttr::CreateImplicit(S.Context));
>  }
>
>  static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList
> &Attr) {
>
> Modified: cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu?rev=248293&r1=248292&r2=248293&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu (original)
> +++ cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu Tue Sep 22 12:22:51 2015
> @@ -1,7 +1,16 @@
> +// Make sure that __global__ functions are emitted along with correct
> +// annotations and are added to @llvm.used to prevent their elimination.
> +// REQUIRES: nvptx-registered-target
> +//
>  // RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device
> -emit-llvm -o - | FileCheck %s
>
>  #include "Inputs/cuda.h"
>
> +// Make sure that all __global__ functions are added to @llvm.used
> +// CHECK: @llvm.used = appending global
> +// CHECK-SAME: @global_function
> +// CHECK-SAME: @_Z16templated_kernelIiEvT_
> +
>  // CHECK-LABEL: define void @device_function
>  extern "C"
>  __device__ void device_function() {}
> @@ -13,4 +22,10 @@ __global__ void global_function() {
>    device_function();
>  }
>
> +// Make sure host-instantiated kernels are preserved on device side.
> +template <typename T> __global__ void templated_kernel(T param) {}
> +// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_
> +void host_function() { templated_kernel<<<0,0>>>(0); }
> +
>  // CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
> +// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_,
> !"kernel", i32 1}
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits@lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to