This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG31d8dbd1e5b4: [CUDA/SPIR-V] Force passing aggregate type byval (authored by shangwuyao).
Changed prior to commit: https://reviews.llvm.org/D130387?vs=446934&id=446966#toc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D130387/new/ https://reviews.llvm.org/D130387 Files: clang/lib/CodeGen/TargetInfo.cpp clang/test/CodeGenCUDASPIRV/copy-aggregate-byval.cu Index: clang/test/CodeGenCUDASPIRV/copy-aggregate-byval.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDASPIRV/copy-aggregate-byval.cu @@ -0,0 +1,25 @@ +// Tests CUDA kernel arguments get copied by value when targeting SPIR-V, even with +// destructor, copy constructor or move constructor defined by user. + +// RUN: %clang -Xclang -no-opaque-pointers -emit-llvm --cuda-device-only --offload=spirv32 \ +// RUN: -nocudalib -nocudainc %s -o %t.bc -c 2>&1 +// RUN: llvm-dis %t.bc -o %t.ll +// RUN: FileCheck %s --input-file=%t.ll + +// RUN: %clang -Xclang -no-opaque-pointers -emit-llvm --cuda-device-only --offload=spirv64 \ +// RUN: -nocudalib -nocudainc %s -o %t.bc -c 2>&1 +// RUN: llvm-dis %t.bc -o %t.ll +// RUN: FileCheck %s --input-file=%t.ll + +class GpuData { + public: + __attribute__((host)) __attribute__((device)) GpuData(int* src) {} + __attribute__((host)) __attribute__((device)) ~GpuData() {} + __attribute__((host)) __attribute__((device)) GpuData(const GpuData& other) {} + __attribute__((host)) __attribute__((device)) GpuData(GpuData&& other) {} +}; + +// CHECK: define +// CHECK-SAME: spir_kernel void @_Z6kernel7GpuData(%class.GpuData* noundef byval(%class.GpuData) align + +__attribute__((global)) void kernel(GpuData output) {} Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -10449,6 +10449,15 @@ LTy = llvm::PointerType::getWithSamePointeeType(PtrTy, GlobalAS); return ABIArgInfo::getDirect(LTy, 0, nullptr, false); } + + // Force copying aggregate type in kernel arguments by value when + // compiling CUDA targeting SPIR-V. This is required for the object + // copied to be valid on the device. + // This behavior follows the CUDA spec + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing, + // and matches the NVPTX implementation. + if (isAggregateTypeForABI(Ty)) + return getNaturalAlignIndirect(Ty, /* byval */ true); } return classifyArgumentType(Ty); }
Index: clang/test/CodeGenCUDASPIRV/copy-aggregate-byval.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDASPIRV/copy-aggregate-byval.cu @@ -0,0 +1,25 @@ +// Tests CUDA kernel arguments get copied by value when targeting SPIR-V, even with +// destructor, copy constructor or move constructor defined by user. + +// RUN: %clang -Xclang -no-opaque-pointers -emit-llvm --cuda-device-only --offload=spirv32 \ +// RUN: -nocudalib -nocudainc %s -o %t.bc -c 2>&1 +// RUN: llvm-dis %t.bc -o %t.ll +// RUN: FileCheck %s --input-file=%t.ll + +// RUN: %clang -Xclang -no-opaque-pointers -emit-llvm --cuda-device-only --offload=spirv64 \ +// RUN: -nocudalib -nocudainc %s -o %t.bc -c 2>&1 +// RUN: llvm-dis %t.bc -o %t.ll +// RUN: FileCheck %s --input-file=%t.ll + +class GpuData { + public: + __attribute__((host)) __attribute__((device)) GpuData(int* src) {} + __attribute__((host)) __attribute__((device)) ~GpuData() {} + __attribute__((host)) __attribute__((device)) GpuData(const GpuData& other) {} + __attribute__((host)) __attribute__((device)) GpuData(GpuData&& other) {} +}; + +// CHECK: define +// CHECK-SAME: spir_kernel void @_Z6kernel7GpuData(%class.GpuData* noundef byval(%class.GpuData) align + +__attribute__((global)) void kernel(GpuData output) {} Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -10449,6 +10449,15 @@ LTy = llvm::PointerType::getWithSamePointeeType(PtrTy, GlobalAS); return ABIArgInfo::getDirect(LTy, 0, nullptr, false); } + + // Force copying aggregate type in kernel arguments by value when + // compiling CUDA targeting SPIR-V. This is required for the object + // copied to be valid on the device. + // This behavior follows the CUDA spec + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing, + // and matches the NVPTX implementation. + if (isAggregateTypeForABI(Ty)) + return getNaturalAlignIndirect(Ty, /* byval */ true); } return classifyArgumentType(Ty); }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits