https://github.com/AustinSchuh updated https://github.com/llvm/llvm-project/pull/134758
>From 1e6367407a4b23b2a85f088ba4b66a0c0afc8faa Mon Sep 17 00:00:00 2001 From: Austin Schuh <austin.li...@gmail.com> Date: Mon, 7 Apr 2025 17:18:38 -0700 Subject: [PATCH 1/2] cuda clang: Move nvptx-surface.cu test to CodeGenCUDA Signed-off-by: Austin Schuh <austin.li...@gmail.com> --- clang/test/CodeGen/Inputs/cuda.h | 194 ------------------ .../{CodeGen => CodeGenCUDA}/nvptx-surface.cu | 164 +++++++++++++++ 2 files changed, 164 insertions(+), 194 deletions(-) delete mode 100644 clang/test/CodeGen/Inputs/cuda.h rename clang/test/{CodeGen => CodeGenCUDA}/nvptx-surface.cu (98%) diff --git a/clang/test/CodeGen/Inputs/cuda.h b/clang/test/CodeGen/Inputs/cuda.h deleted file mode 100644 index 58202442e1f8c..0000000000000 --- a/clang/test/CodeGen/Inputs/cuda.h +++ /dev/null @@ -1,194 +0,0 @@ -/* Minimal declarations for CUDA support. Testing purposes only. - * This should stay in sync with clang/test/Headers/Inputs/include/cuda.h - */ -#pragma once - -// Make this file work with nvcc, for testing compatibility. - -#ifndef __NVCC__ -#define __constant__ __attribute__((constant)) -#define __device__ __attribute__((device)) -#define __global__ __attribute__((global)) -#define __host__ __attribute__((host)) -#define __shared__ __attribute__((shared)) -#define __managed__ __attribute__((managed)) -#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__))) - -struct dim3 { - unsigned x, y, z; - __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {} -}; - -// Host- and device-side placement new overloads. -void *operator new(__SIZE_TYPE__, void *p) { return p; } -void *operator new[](__SIZE_TYPE__, void *p) { return p; } -__device__ void *operator new(__SIZE_TYPE__, void *p) { return p; } -__device__ void *operator new[](__SIZE_TYPE__, void *p) { return p; } - -#define CUDA_VERSION 10100 - -struct char1 { - char x; - __host__ __device__ char1(char x = 0) : x(x) {} -}; -struct char2 { - char x, y; - __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {} -}; -struct char4 { - char x, y, z, w; - __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct uchar1 { - unsigned char x; - __host__ __device__ uchar1(unsigned char x = 0) : x(x) {} -}; -struct uchar2 { - unsigned char x, y; - __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {} -}; -struct uchar4 { - unsigned char x, y, z, w; - __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct short1 { - short x; - __host__ __device__ short1(short x = 0) : x(x) {} -}; -struct short2 { - short x, y; - __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {} -}; -struct short4 { - short x, y, z, w; - __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct ushort1 { - unsigned short x; - __host__ __device__ ushort1(unsigned short x = 0) : x(x) {} -}; -struct ushort2 { - unsigned short x, y; - __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {} -}; -struct ushort4 { - unsigned short x, y, z, w; - __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct int1 { - int x; - __host__ __device__ int1(int x = 0) : x(x) {} -}; -struct int2 { - int x, y; - __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {} -}; -struct int4 { - int x, y, z, w; - __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct uint1 { - unsigned x; - __host__ __device__ uint1(unsigned x = 0) : x(x) {} -}; -struct uint2 { - unsigned x, y; - __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {} -}; -struct uint3 { - unsigned x, y, z; - __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {} -}; -struct uint4 { - unsigned x, y, z, w; - __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct longlong1 { - long long x; - __host__ __device__ longlong1(long long x = 0) : x(x) {} -}; -struct longlong2 { - long long x, y; - __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {} -}; -struct longlong4 { - long long x, y, z, w; - __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct ulonglong1 { - unsigned long long x; - __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {} -}; -struct ulonglong2 { - unsigned long long x, y; - __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {} -}; -struct ulonglong4 { - unsigned long long x, y, z, w; - __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct float1 { - float x; - __host__ __device__ float1(float x = 0) : x(x) {} -}; -struct float2 { - float x, y; - __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {} -}; -struct float4 { - float x, y, z, w; - __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct double1 { - double x; - __host__ __device__ double1(double x = 0) : x(x) {} -}; -struct double2 { - double x, y; - __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {} -}; -struct double4 { - double x, y, z, w; - __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {} -}; - -typedef unsigned long long cudaTextureObject_t; -typedef unsigned long long cudaSurfaceObject_t; - -enum cudaTextureReadMode { - cudaReadModeNormalizedFloat, - cudaReadModeElementType -}; - -enum cudaSurfaceBoundaryMode { - cudaBoundaryModeZero, - cudaBoundaryModeClamp, - cudaBoundaryModeTrap -}; - -enum { - cudaTextureType1D, - cudaTextureType2D, - cudaTextureType3D, - cudaTextureTypeCubemap, - cudaTextureType1DLayered, - cudaTextureType2DLayered, - cudaTextureTypeCubemapLayered -}; - -struct textureReference {}; -template <class T, int texType = cudaTextureType1D, - enum cudaTextureReadMode mode = cudaReadModeElementType> -struct __attribute__((device_builtin_texture_type)) texture - : public textureReference {}; - -#endif // !__NVCC__ diff --git a/clang/test/CodeGen/nvptx-surface.cu b/clang/test/CodeGenCUDA/nvptx-surface.cu similarity index 98% rename from clang/test/CodeGen/nvptx-surface.cu rename to clang/test/CodeGenCUDA/nvptx-surface.cu index 56995f2c0da80..f7822740a571c 100644 --- a/clang/test/CodeGen/nvptx-surface.cu +++ b/clang/test/CodeGenCUDA/nvptx-surface.cu @@ -2,6 +2,170 @@ // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -O3 -o - %s -emit-llvm | FileCheck %s #include "Inputs/cuda.h" +struct char1 { + char x; + __host__ __device__ char1(char x = 0) : x(x) {} +}; +struct char2 { + char x, y; + __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {} +}; +struct char4 { + char x, y, z, w; + __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct uchar1 { + unsigned char x; + __host__ __device__ uchar1(unsigned char x = 0) : x(x) {} +}; +struct uchar2 { + unsigned char x, y; + __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {} +}; +struct uchar4 { + unsigned char x, y, z, w; + __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct short1 { + short x; + __host__ __device__ short1(short x = 0) : x(x) {} +}; +struct short2 { + short x, y; + __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {} +}; +struct short4 { + short x, y, z, w; + __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct ushort1 { + unsigned short x; + __host__ __device__ ushort1(unsigned short x = 0) : x(x) {} +}; +struct ushort2 { + unsigned short x, y; + __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {} +}; +struct ushort4 { + unsigned short x, y, z, w; + __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct int1 { + int x; + __host__ __device__ int1(int x = 0) : x(x) {} +}; +struct int2 { + int x, y; + __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {} +}; +struct int4 { + int x, y, z, w; + __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct uint1 { + unsigned x; + __host__ __device__ uint1(unsigned x = 0) : x(x) {} +}; +struct uint2 { + unsigned x, y; + __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {} +}; +struct uint3 { + unsigned x, y, z; + __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {} +}; +struct uint4 { + unsigned x, y, z, w; + __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct longlong1 { + long long x; + __host__ __device__ longlong1(long long x = 0) : x(x) {} +}; +struct longlong2 { + long long x, y; + __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {} +}; +struct longlong4 { + long long x, y, z, w; + __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct ulonglong1 { + unsigned long long x; + __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {} +}; +struct ulonglong2 { + unsigned long long x, y; + __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {} +}; +struct ulonglong4 { + unsigned long long x, y, z, w; + __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct float1 { + float x; + __host__ __device__ float1(float x = 0) : x(x) {} +}; +struct float2 { + float x, y; + __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {} +}; +struct float4 { + float x, y, z, w; + __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct double1 { + double x; + __host__ __device__ double1(double x = 0) : x(x) {} +}; +struct double2 { + double x, y; + __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {} +}; +struct double4 { + double x, y, z, w; + __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {} +}; + +typedef unsigned long long cudaTextureObject_t; +typedef unsigned long long cudaSurfaceObject_t; + +enum cudaTextureReadMode { + cudaReadModeNormalizedFloat, + cudaReadModeElementType +}; + +enum cudaSurfaceBoundaryMode { + cudaBoundaryModeZero, + cudaBoundaryModeClamp, + cudaBoundaryModeTrap +}; + +enum { + cudaTextureType1D, + cudaTextureType2D, + cudaTextureType3D, + cudaTextureTypeCubemap, + cudaTextureType1DLayered, + cudaTextureType2DLayered, + cudaTextureTypeCubemapLayered +}; + +struct textureReference {}; +template <class T, int texType = cudaTextureType1D, + enum cudaTextureReadMode mode = cudaReadModeElementType> +struct __attribute__((device_builtin_texture_type)) texture + : public textureReference {}; + #include "__clang_cuda_texture_intrinsics.h" __device__ void surfchar(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) { >From 522cd517577a46e1d491cb7400169e34b92e4a00 Mon Sep 17 00:00:00 2001 From: Austin Schuh <austin.li...@gmail.com> Date: Tue, 8 Apr 2025 14:46:17 -0700 Subject: [PATCH 2/2] Move code to cuda.h, and fix issues that causes --- clang/test/CodeGenCUDA/Inputs/cuda.h | 169 ++++++++++++++++++ .../test/CodeGenCUDA/correctly-rounded-div.cu | 4 +- clang/test/CodeGenCUDA/nvptx-surface.cu | 164 ----------------- clang/test/CodeGenCUDA/offloading-entries.cu | 22 +-- .../test/CodeGenCUDA/propagate-attributes.cu | 5 +- 5 files changed, 180 insertions(+), 184 deletions(-) diff --git a/clang/test/CodeGenCUDA/Inputs/cuda.h b/clang/test/CodeGenCUDA/Inputs/cuda.h index a8d85afb7cd21..dc85eae0c5178 100644 --- a/clang/test/CodeGenCUDA/Inputs/cuda.h +++ b/clang/test/CodeGenCUDA/Inputs/cuda.h @@ -72,3 +72,172 @@ extern "C" cudaError_t cudaLaunchKernel_ptsz(const void *func, dim3 gridDim, #endif extern "C" __device__ int printf(const char*, ...); + +struct char1 { + char x; + __host__ __device__ char1(char x = 0) : x(x) {} +}; +struct char2 { + char x, y; + __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {} +}; +struct char4 { + char x, y, z, w; + __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct uchar1 { + unsigned char x; + __host__ __device__ uchar1(unsigned char x = 0) : x(x) {} +}; +struct uchar2 { + unsigned char x, y; + __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {} +}; +struct uchar4 { + unsigned char x, y, z, w; + __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct short1 { + short x; + __host__ __device__ short1(short x = 0) : x(x) {} +}; +struct short2 { + short x, y; + __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {} +}; +struct short4 { + short x, y, z, w; + __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct ushort1 { + unsigned short x; + __host__ __device__ ushort1(unsigned short x = 0) : x(x) {} +}; +struct ushort2 { + unsigned short x, y; + __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {} +}; +struct ushort4 { + unsigned short x, y, z, w; + __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct int1 { + int x; + __host__ __device__ int1(int x = 0) : x(x) {} +}; +struct int2 { + int x, y; + __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {} +}; +struct int4 { + int x, y, z, w; + __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct uint1 { + unsigned x; + __host__ __device__ uint1(unsigned x = 0) : x(x) {} +}; +struct uint2 { + unsigned x, y; + __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {} +}; +struct uint3 { + unsigned x, y, z; + __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {} +}; +struct uint4 { + unsigned x, y, z, w; + __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct longlong1 { + long long x; + __host__ __device__ longlong1(long long x = 0) : x(x) {} +}; +struct longlong2 { + long long x, y; + __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {} +}; +struct longlong4 { + long long x, y, z, w; + __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct ulonglong1 { + unsigned long long x; + __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {} +}; +struct ulonglong2 { + unsigned long long x, y; + __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {} +}; +struct ulonglong4 { + unsigned long long x, y, z, w; + __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct float1 { + float x; + __host__ __device__ float1(float x = 0) : x(x) {} +}; +struct float2 { + float x, y; + __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {} +}; +struct float4 { + float x, y, z, w; + __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {} +}; + +struct double1 { + double x; + __host__ __device__ double1(double x = 0) : x(x) {} +}; +struct double2 { + double x, y; + __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {} +}; +struct double4 { + double x, y, z, w; + __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {} +}; + +typedef unsigned long long cudaTextureObject_t; +typedef unsigned long long cudaSurfaceObject_t; + +enum cudaTextureReadMode { + cudaReadModeNormalizedFloat, + cudaReadModeElementType +}; + +enum cudaSurfaceBoundaryMode { + cudaBoundaryModeZero, + cudaBoundaryModeClamp, + cudaBoundaryModeTrap +}; + +enum { + cudaTextureType1D, + cudaTextureType2D, + cudaTextureType3D, + cudaTextureTypeCubemap, + cudaTextureType1DLayered, + cudaTextureType2DLayered, + cudaTextureTypeCubemapLayered +}; + +struct textureReference { }; +template <class T, int texType = cudaTextureType1D, + enum cudaTextureReadMode mode = cudaReadModeElementType> +struct __attribute__((device_builtin_texture_type)) texture + : public textureReference {}; + +struct surfaceReference { int desc; }; + +template <typename T, int dim = 1> +struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {}; diff --git a/clang/test/CodeGenCUDA/correctly-rounded-div.cu b/clang/test/CodeGenCUDA/correctly-rounded-div.cu index abc130e8a4ee4..9451922060597 100644 --- a/clang/test/CodeGenCUDA/correctly-rounded-div.cu +++ b/clang/test/CodeGenCUDA/correctly-rounded-div.cu @@ -8,7 +8,7 @@ #include "Inputs/cuda.h" -typedef __attribute__(( ext_vector_type(4) )) float float4; +typedef __attribute__(( ext_vector_type(4) )) float floatvec4; // COMMON-LABEL: @_Z11spscalardiv // COMMON: fdiv{{.*}}, @@ -22,7 +22,7 @@ __device__ float spscalardiv(float a, float b) { // COMMON: fdiv{{.*}}, // NCRDIV: !fpmath ![[MD]] // CRDIV-NOT: !fpmath -__device__ float4 spvectordiv(float4 a, float4 b) { +__device__ floatvec4 spvectordiv(floatvec4 a, floatvec4 b) { return a / b; } diff --git a/clang/test/CodeGenCUDA/nvptx-surface.cu b/clang/test/CodeGenCUDA/nvptx-surface.cu index f7822740a571c..56995f2c0da80 100644 --- a/clang/test/CodeGenCUDA/nvptx-surface.cu +++ b/clang/test/CodeGenCUDA/nvptx-surface.cu @@ -2,170 +2,6 @@ // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -O3 -o - %s -emit-llvm | FileCheck %s #include "Inputs/cuda.h" -struct char1 { - char x; - __host__ __device__ char1(char x = 0) : x(x) {} -}; -struct char2 { - char x, y; - __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {} -}; -struct char4 { - char x, y, z, w; - __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct uchar1 { - unsigned char x; - __host__ __device__ uchar1(unsigned char x = 0) : x(x) {} -}; -struct uchar2 { - unsigned char x, y; - __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {} -}; -struct uchar4 { - unsigned char x, y, z, w; - __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct short1 { - short x; - __host__ __device__ short1(short x = 0) : x(x) {} -}; -struct short2 { - short x, y; - __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {} -}; -struct short4 { - short x, y, z, w; - __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct ushort1 { - unsigned short x; - __host__ __device__ ushort1(unsigned short x = 0) : x(x) {} -}; -struct ushort2 { - unsigned short x, y; - __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {} -}; -struct ushort4 { - unsigned short x, y, z, w; - __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct int1 { - int x; - __host__ __device__ int1(int x = 0) : x(x) {} -}; -struct int2 { - int x, y; - __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {} -}; -struct int4 { - int x, y, z, w; - __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct uint1 { - unsigned x; - __host__ __device__ uint1(unsigned x = 0) : x(x) {} -}; -struct uint2 { - unsigned x, y; - __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {} -}; -struct uint3 { - unsigned x, y, z; - __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {} -}; -struct uint4 { - unsigned x, y, z, w; - __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct longlong1 { - long long x; - __host__ __device__ longlong1(long long x = 0) : x(x) {} -}; -struct longlong2 { - long long x, y; - __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {} -}; -struct longlong4 { - long long x, y, z, w; - __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct ulonglong1 { - unsigned long long x; - __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {} -}; -struct ulonglong2 { - unsigned long long x, y; - __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {} -}; -struct ulonglong4 { - unsigned long long x, y, z, w; - __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct float1 { - float x; - __host__ __device__ float1(float x = 0) : x(x) {} -}; -struct float2 { - float x, y; - __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {} -}; -struct float4 { - float x, y, z, w; - __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {} -}; - -struct double1 { - double x; - __host__ __device__ double1(double x = 0) : x(x) {} -}; -struct double2 { - double x, y; - __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {} -}; -struct double4 { - double x, y, z, w; - __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {} -}; - -typedef unsigned long long cudaTextureObject_t; -typedef unsigned long long cudaSurfaceObject_t; - -enum cudaTextureReadMode { - cudaReadModeNormalizedFloat, - cudaReadModeElementType -}; - -enum cudaSurfaceBoundaryMode { - cudaBoundaryModeZero, - cudaBoundaryModeClamp, - cudaBoundaryModeTrap -}; - -enum { - cudaTextureType1D, - cudaTextureType2D, - cudaTextureType3D, - cudaTextureTypeCubemap, - cudaTextureType1DLayered, - cudaTextureType2DLayered, - cudaTextureTypeCubemapLayered -}; - -struct textureReference {}; -template <class T, int texType = cudaTextureType1D, - enum cudaTextureReadMode mode = cudaReadModeElementType> -struct __attribute__((device_builtin_texture_type)) texture - : public textureReference {}; - #include "__clang_cuda_texture_intrinsics.h" __device__ void surfchar(cudaSurfaceObject_t surf, int x, int y, int z, int layer, int face, int layerface) { diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu index c053cf586f8f5..c22378105f71d 100644 --- a/clang/test/CodeGenCUDA/offloading-entries.cu +++ b/clang/test/CodeGenCUDA/offloading-entries.cu @@ -29,7 +29,7 @@ // CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading" // CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "llvm_offload_entries" // CUDA: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading" -// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "llvm_offload_entries" +// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries" //. // HIP: @managed.managed = global i32 0, align 4 // HIP: @managed = externally_initialized global ptr null @@ -44,7 +44,7 @@ // HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading" // HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "llvm_offload_entries" // HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading" -// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "llvm_offload_entries" +// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries" //. // CUDA-COFF: @managed = dso_local global i32 undef, align 4 // CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading" @@ -58,7 +58,7 @@ // CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading" // CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "llvm_offload_entries$OE" // CUDA-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading" -// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "llvm_offload_entries$OE" +// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries$OE" //. // HIP-COFF: @managed.managed = dso_local global i32 0, align 4 // HIP-COFF: @managed = dso_local externally_initialized global ptr null @@ -73,7 +73,7 @@ // HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading" // HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "llvm_offload_entries$OE" // HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading" -// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "llvm_offload_entries$OE" +// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, i64 1, ptr null }, section "llvm_offload_entries$OE" //. // CUDA-LABEL: @_Z18__device_stub__foov( // CUDA-NEXT: entry: @@ -139,18 +139,6 @@ __device__ __managed__ int managed = 0; // __global__ void kernel() { external = 1; } -struct surfaceReference { int desc; }; - -template <typename T, int dim = 1> -struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {}; - surface<void> surf; -struct textureReference { - int desc; -}; - -template <typename T, int dim = 1, int mode = 0> -struct __attribute__((device_builtin_texture_type)) texture : public textureReference {}; - -texture<void> tex; +texture<void, cudaTextureType2D> tex; diff --git a/clang/test/CodeGenCUDA/propagate-attributes.cu b/clang/test/CodeGenCUDA/propagate-attributes.cu index 5aee677800f2e..6dfd44487d1dc 100644 --- a/clang/test/CodeGenCUDA/propagate-attributes.cu +++ b/clang/test/CodeGenCUDA/propagate-attributes.cu @@ -25,6 +25,10 @@ // RUN: -fcuda-is-device -funsafe-math-optimizations -triple nvptx-unknown-unknown \ // RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FAST +#ifndef LIB +#include "Inputs/cuda.h" +#endif + // Wrap everything in extern "C" so we don't have to worry about name mangling // in the IR. extern "C" { @@ -36,7 +40,6 @@ void lib_fn() {} #else -#include "Inputs/cuda.h" __device__ void lib_fn(); __global__ void kernel() { lib_fn(); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits