https://github.com/AustinSchuh updated 
https://github.com/llvm/llvm-project/pull/134758

>From 1e6367407a4b23b2a85f088ba4b66a0c0afc8faa Mon Sep 17 00:00:00 2001
From: Austin Schuh <austin.li...@gmail.com>
Date: Mon, 7 Apr 2025 17:18:38 -0700
Subject: [PATCH 1/2] cuda clang: Move nvptx-surface.cu test to CodeGenCUDA

Signed-off-by: Austin Schuh <austin.li...@gmail.com>
---
 clang/test/CodeGen/Inputs/cuda.h              | 194 ------------------
 .../{CodeGen => CodeGenCUDA}/nvptx-surface.cu | 164 +++++++++++++++
 2 files changed, 164 insertions(+), 194 deletions(-)
 delete mode 100644 clang/test/CodeGen/Inputs/cuda.h
 rename clang/test/{CodeGen => CodeGenCUDA}/nvptx-surface.cu (98%)

diff --git a/clang/test/CodeGen/Inputs/cuda.h b/clang/test/CodeGen/Inputs/cuda.h
deleted file mode 100644
index 58202442e1f8c..0000000000000
--- a/clang/test/CodeGen/Inputs/cuda.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/* Minimal declarations for CUDA support.  Testing purposes only.
- * This should stay in sync with clang/test/Headers/Inputs/include/cuda.h
- */
-#pragma once
-
-// Make this file work with nvcc, for testing compatibility.
-
-#ifndef __NVCC__
-#define __constant__ __attribute__((constant))
-#define __device__ __attribute__((device))
-#define __global__ __attribute__((global))
-#define __host__ __attribute__((host))
-#define __shared__ __attribute__((shared))
-#define __managed__ __attribute__((managed))
-#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
-
-struct dim3 {
-  unsigned x, y, z;
-  __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), 
y(y), z(z) {}
-};
-
-// Host- and device-side placement new overloads.
-void *operator new(__SIZE_TYPE__, void *p) { return p; }
-void *operator new[](__SIZE_TYPE__, void *p) { return p; }
-__device__ void *operator new(__SIZE_TYPE__, void *p) { return p; }
-__device__ void *operator new[](__SIZE_TYPE__, void *p) { return p; }
-
-#define CUDA_VERSION 10100
-
-struct char1 {
-  char x;
-  __host__ __device__ char1(char x = 0) : x(x) {}
-};
-struct char2 {
-  char x, y;
-  __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {}
-};
-struct char4 {
-  char x, y, z, w;
-  __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : 
x(x), y(y), z(z), w(w) {}
-};
-
-struct uchar1 {
-  unsigned char x;
-  __host__ __device__ uchar1(unsigned char x = 0) : x(x) {}
-};
-struct uchar2 {
-  unsigned char x, y;
-  __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), 
y(y) {}
-};
-struct uchar4 {
-  unsigned char x, y, z, w;
-  __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, 
unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct short1 {
-  short x;
-  __host__ __device__ short1(short x = 0) : x(x) {}
-};
-struct short2 {
-  short x, y;
-  __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {}
-};
-struct short4 {
-  short x, y, z, w;
-  __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 
0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct ushort1 {
-  unsigned short x;
-  __host__ __device__ ushort1(unsigned short x = 0) : x(x) {}
-};
-struct ushort2 {
-  unsigned short x, y;
-  __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : 
x(x), y(y) {}
-};
-struct ushort4 {
-  unsigned short x, y, z, w;
-  __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, 
unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct int1 {
-  int x;
-  __host__ __device__ int1(int x = 0) : x(x) {}
-};
-struct int2 {
-  int x, y;
-  __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {}
-};
-struct int4 {
-  int x, y, z, w;
-  __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), 
y(y), z(z), w(w) {}
-};
-
-struct uint1 {
-  unsigned x;
-  __host__ __device__ uint1(unsigned x = 0) : x(x) {}
-};
-struct uint2 {
-  unsigned x, y;
-  __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {}
-};
-struct uint3 {
-  unsigned x, y, z;
-  __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : 
x(x), y(y), z(z) {}
-};
-struct uint4 {
-  unsigned x, y, z, w;
-  __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, 
unsigned w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct longlong1 {
-  long long x;
-  __host__ __device__ longlong1(long long x = 0) : x(x) {}
-};
-struct longlong2 {
-  long long x, y;
-  __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) 
{}
-};
-struct longlong4 {
-  long long x, y, z, w;
-  __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z 
= 0, long long w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct ulonglong1 {
-  unsigned long long x;
-  __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {}
-};
-struct ulonglong2 {
-  unsigned long long x, y;
-  __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long 
y = 0) : x(x), y(y) {}
-};
-struct ulonglong4 {
-  unsigned long long x, y, z, w;
-  __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long 
y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), 
w(w) {}
-};
-
-struct float1 {
-  float x;
-  __host__ __device__ float1(float x = 0) : x(x) {}
-};
-struct float2 {
-  float x, y;
-  __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {}
-};
-struct float4 {
-  float x, y, z, w;
-  __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 
0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct double1 {
-  double x;
-  __host__ __device__ double1(double x = 0) : x(x) {}
-};
-struct double2 {
-  double x, y;
-  __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {}
-};
-struct double4 {
-  double x, y, z, w;
-  __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double 
w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-typedef unsigned long long cudaTextureObject_t;
-typedef unsigned long long cudaSurfaceObject_t;
-
-enum cudaTextureReadMode {
-  cudaReadModeNormalizedFloat,
-  cudaReadModeElementType
-};
-
-enum cudaSurfaceBoundaryMode {
-  cudaBoundaryModeZero,
-  cudaBoundaryModeClamp,
-  cudaBoundaryModeTrap
-};
-
-enum {
-  cudaTextureType1D,
-  cudaTextureType2D,
-  cudaTextureType3D,
-  cudaTextureTypeCubemap,
-  cudaTextureType1DLayered,
-  cudaTextureType2DLayered,
-  cudaTextureTypeCubemapLayered
-};
-
-struct textureReference {};
-template <class T, int texType = cudaTextureType1D,
-          enum cudaTextureReadMode mode = cudaReadModeElementType>
-struct __attribute__((device_builtin_texture_type)) texture
-    : public textureReference {};
-
-#endif // !__NVCC__
diff --git a/clang/test/CodeGen/nvptx-surface.cu 
b/clang/test/CodeGenCUDA/nvptx-surface.cu
similarity index 98%
rename from clang/test/CodeGen/nvptx-surface.cu
rename to clang/test/CodeGenCUDA/nvptx-surface.cu
index 56995f2c0da80..f7822740a571c 100644
--- a/clang/test/CodeGen/nvptx-surface.cu
+++ b/clang/test/CodeGenCUDA/nvptx-surface.cu
@@ -2,6 +2,170 @@
 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -O3 -o - 
%s -emit-llvm | FileCheck %s
 #include "Inputs/cuda.h"
 
+struct char1 {
+  char x;
+  __host__ __device__ char1(char x = 0) : x(x) {}
+};
+struct char2 {
+  char x, y;
+  __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {}
+};
+struct char4 {
+  char x, y, z, w;
+  __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : 
x(x), y(y), z(z), w(w) {}
+};
+
+struct uchar1 {
+  unsigned char x;
+  __host__ __device__ uchar1(unsigned char x = 0) : x(x) {}
+};
+struct uchar2 {
+  unsigned char x, y;
+  __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), 
y(y) {}
+};
+struct uchar4 {
+  unsigned char x, y, z, w;
+  __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, 
unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct short1 {
+  short x;
+  __host__ __device__ short1(short x = 0) : x(x) {}
+};
+struct short2 {
+  short x, y;
+  __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {}
+};
+struct short4 {
+  short x, y, z, w;
+  __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 
0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct ushort1 {
+  unsigned short x;
+  __host__ __device__ ushort1(unsigned short x = 0) : x(x) {}
+};
+struct ushort2 {
+  unsigned short x, y;
+  __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : 
x(x), y(y) {}
+};
+struct ushort4 {
+  unsigned short x, y, z, w;
+  __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, 
unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct int1 {
+  int x;
+  __host__ __device__ int1(int x = 0) : x(x) {}
+};
+struct int2 {
+  int x, y;
+  __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {}
+};
+struct int4 {
+  int x, y, z, w;
+  __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), 
y(y), z(z), w(w) {}
+};
+
+struct uint1 {
+  unsigned x;
+  __host__ __device__ uint1(unsigned x = 0) : x(x) {}
+};
+struct uint2 {
+  unsigned x, y;
+  __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {}
+};
+struct uint3 {
+  unsigned x, y, z;
+  __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : 
x(x), y(y), z(z) {}
+};
+struct uint4 {
+  unsigned x, y, z, w;
+  __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, 
unsigned w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct longlong1 {
+  long long x;
+  __host__ __device__ longlong1(long long x = 0) : x(x) {}
+};
+struct longlong2 {
+  long long x, y;
+  __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) 
{}
+};
+struct longlong4 {
+  long long x, y, z, w;
+  __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z 
= 0, long long w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct ulonglong1 {
+  unsigned long long x;
+  __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {}
+};
+struct ulonglong2 {
+  unsigned long long x, y;
+  __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long 
y = 0) : x(x), y(y) {}
+};
+struct ulonglong4 {
+  unsigned long long x, y, z, w;
+  __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long 
y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), 
w(w) {}
+};
+
+struct float1 {
+  float x;
+  __host__ __device__ float1(float x = 0) : x(x) {}
+};
+struct float2 {
+  float x, y;
+  __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {}
+};
+struct float4 {
+  float x, y, z, w;
+  __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 
0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct double1 {
+  double x;
+  __host__ __device__ double1(double x = 0) : x(x) {}
+};
+struct double2 {
+  double x, y;
+  __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {}
+};
+struct double4 {
+  double x, y, z, w;
+  __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double 
w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+typedef unsigned long long cudaTextureObject_t;
+typedef unsigned long long cudaSurfaceObject_t;
+
+enum cudaTextureReadMode {
+  cudaReadModeNormalizedFloat,
+  cudaReadModeElementType
+};
+
+enum cudaSurfaceBoundaryMode {
+  cudaBoundaryModeZero,
+  cudaBoundaryModeClamp,
+  cudaBoundaryModeTrap
+};
+
+enum {
+  cudaTextureType1D,
+  cudaTextureType2D,
+  cudaTextureType3D,
+  cudaTextureTypeCubemap,
+  cudaTextureType1DLayered,
+  cudaTextureType2DLayered,
+  cudaTextureTypeCubemapLayered
+};
+
+struct textureReference {};
+template <class T, int texType = cudaTextureType1D,
+          enum cudaTextureReadMode mode = cudaReadModeElementType>
+struct __attribute__((device_builtin_texture_type)) texture
+    : public textureReference {};
+
 #include "__clang_cuda_texture_intrinsics.h"
 
 __device__ void surfchar(cudaSurfaceObject_t surf, int x, int y, int z, int 
layer, int face, int layerface) {

>From 522cd517577a46e1d491cb7400169e34b92e4a00 Mon Sep 17 00:00:00 2001
From: Austin Schuh <austin.li...@gmail.com>
Date: Tue, 8 Apr 2025 14:46:17 -0700
Subject: [PATCH 2/2] Move code to cuda.h, and fix issues that causes

---
 clang/test/CodeGenCUDA/Inputs/cuda.h          | 169 ++++++++++++++++++
 .../test/CodeGenCUDA/correctly-rounded-div.cu |   4 +-
 clang/test/CodeGenCUDA/nvptx-surface.cu       | 164 -----------------
 clang/test/CodeGenCUDA/offloading-entries.cu  |  22 +--
 .../test/CodeGenCUDA/propagate-attributes.cu  |   5 +-
 5 files changed, 180 insertions(+), 184 deletions(-)

diff --git a/clang/test/CodeGenCUDA/Inputs/cuda.h 
b/clang/test/CodeGenCUDA/Inputs/cuda.h
index a8d85afb7cd21..dc85eae0c5178 100644
--- a/clang/test/CodeGenCUDA/Inputs/cuda.h
+++ b/clang/test/CodeGenCUDA/Inputs/cuda.h
@@ -72,3 +72,172 @@ extern "C" cudaError_t cudaLaunchKernel_ptsz(const void 
*func, dim3 gridDim,
 #endif
 
 extern "C" __device__ int printf(const char*, ...);
+
+struct char1 {
+  char x;
+  __host__ __device__ char1(char x = 0) : x(x) {}
+};
+struct char2 {
+  char x, y;
+  __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {}
+};
+struct char4 {
+  char x, y, z, w;
+  __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : 
x(x), y(y), z(z), w(w) {}
+};
+
+struct uchar1 {
+  unsigned char x;
+  __host__ __device__ uchar1(unsigned char x = 0) : x(x) {}
+};
+struct uchar2 {
+  unsigned char x, y;
+  __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), 
y(y) {}
+};
+struct uchar4 {
+  unsigned char x, y, z, w;
+  __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, 
unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct short1 {
+  short x;
+  __host__ __device__ short1(short x = 0) : x(x) {}
+};
+struct short2 {
+  short x, y;
+  __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {}
+};
+struct short4 {
+  short x, y, z, w;
+  __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 
0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct ushort1 {
+  unsigned short x;
+  __host__ __device__ ushort1(unsigned short x = 0) : x(x) {}
+};
+struct ushort2 {
+  unsigned short x, y;
+  __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : 
x(x), y(y) {}
+};
+struct ushort4 {
+  unsigned short x, y, z, w;
+  __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, 
unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct int1 {
+  int x;
+  __host__ __device__ int1(int x = 0) : x(x) {}
+};
+struct int2 {
+  int x, y;
+  __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {}
+};
+struct int4 {
+  int x, y, z, w;
+  __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), 
y(y), z(z), w(w) {}
+};
+
+struct uint1 {
+  unsigned x;
+  __host__ __device__ uint1(unsigned x = 0) : x(x) {}
+};
+struct uint2 {
+  unsigned x, y;
+  __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {}
+};
+struct uint3 {
+  unsigned x, y, z;
+  __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : 
x(x), y(y), z(z) {}
+};
+struct uint4 {
+  unsigned x, y, z, w;
+  __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, 
unsigned w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct longlong1 {
+  long long x;
+  __host__ __device__ longlong1(long long x = 0) : x(x) {}
+};
+struct longlong2 {
+  long long x, y;
+  __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) 
{}
+};
+struct longlong4 {
+  long long x, y, z, w;
+  __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z 
= 0, long long w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct ulonglong1 {
+  unsigned long long x;
+  __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {}
+};
+struct ulonglong2 {
+  unsigned long long x, y;
+  __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long 
y = 0) : x(x), y(y) {}
+};
+struct ulonglong4 {
+  unsigned long long x, y, z, w;
+  __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long 
y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), 
w(w) {}
+};
+
+struct float1 {
+  float x;
+  __host__ __device__ float1(float x = 0) : x(x) {}
+};
+struct float2 {
+  float x, y;
+  __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {}
+};
+struct float4 {
+  float x, y, z, w;
+  __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 
0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct double1 {
+  double x;
+  __host__ __device__ double1(double x = 0) : x(x) {}
+};
+struct double2 {
+  double x, y;
+  __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {}
+};
+struct double4 {
+  double x, y, z, w;
+  __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double 
w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+typedef unsigned long long cudaTextureObject_t;
+typedef unsigned long long cudaSurfaceObject_t;
+
+enum cudaTextureReadMode {
+  cudaReadModeNormalizedFloat,
+  cudaReadModeElementType
+};
+
+enum cudaSurfaceBoundaryMode {
+  cudaBoundaryModeZero,
+  cudaBoundaryModeClamp,
+  cudaBoundaryModeTrap
+};
+
+enum {
+  cudaTextureType1D,
+  cudaTextureType2D,
+  cudaTextureType3D,
+  cudaTextureTypeCubemap,
+  cudaTextureType1DLayered,
+  cudaTextureType2DLayered,
+  cudaTextureTypeCubemapLayered
+};
+
+struct textureReference { };
+template <class T, int texType = cudaTextureType1D,
+          enum cudaTextureReadMode mode = cudaReadModeElementType>
+struct __attribute__((device_builtin_texture_type)) texture
+    : public textureReference {};
+
+struct surfaceReference { int desc; };
+
+template <typename T, int dim = 1>
+struct __attribute__((device_builtin_surface_type)) surface : public 
surfaceReference {};
diff --git a/clang/test/CodeGenCUDA/correctly-rounded-div.cu 
b/clang/test/CodeGenCUDA/correctly-rounded-div.cu
index abc130e8a4ee4..9451922060597 100644
--- a/clang/test/CodeGenCUDA/correctly-rounded-div.cu
+++ b/clang/test/CodeGenCUDA/correctly-rounded-div.cu
@@ -8,7 +8,7 @@
 
 #include "Inputs/cuda.h"
 
-typedef __attribute__(( ext_vector_type(4) )) float float4;
+typedef __attribute__(( ext_vector_type(4) )) float floatvec4;
 
 // COMMON-LABEL: @_Z11spscalardiv
 // COMMON: fdiv{{.*}},
@@ -22,7 +22,7 @@ __device__ float spscalardiv(float a, float b) {
 // COMMON: fdiv{{.*}},
 // NCRDIV: !fpmath ![[MD]]
 // CRDIV-NOT: !fpmath
-__device__ float4 spvectordiv(float4 a, float4 b) {
+__device__ floatvec4 spvectordiv(floatvec4 a, floatvec4 b) {
   return a / b;
 }
 
diff --git a/clang/test/CodeGenCUDA/nvptx-surface.cu 
b/clang/test/CodeGenCUDA/nvptx-surface.cu
index f7822740a571c..56995f2c0da80 100644
--- a/clang/test/CodeGenCUDA/nvptx-surface.cu
+++ b/clang/test/CodeGenCUDA/nvptx-surface.cu
@@ -2,170 +2,6 @@
 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -O3 -o - 
%s -emit-llvm | FileCheck %s
 #include "Inputs/cuda.h"
 
-struct char1 {
-  char x;
-  __host__ __device__ char1(char x = 0) : x(x) {}
-};
-struct char2 {
-  char x, y;
-  __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {}
-};
-struct char4 {
-  char x, y, z, w;
-  __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : 
x(x), y(y), z(z), w(w) {}
-};
-
-struct uchar1 {
-  unsigned char x;
-  __host__ __device__ uchar1(unsigned char x = 0) : x(x) {}
-};
-struct uchar2 {
-  unsigned char x, y;
-  __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), 
y(y) {}
-};
-struct uchar4 {
-  unsigned char x, y, z, w;
-  __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, 
unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct short1 {
-  short x;
-  __host__ __device__ short1(short x = 0) : x(x) {}
-};
-struct short2 {
-  short x, y;
-  __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {}
-};
-struct short4 {
-  short x, y, z, w;
-  __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 
0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct ushort1 {
-  unsigned short x;
-  __host__ __device__ ushort1(unsigned short x = 0) : x(x) {}
-};
-struct ushort2 {
-  unsigned short x, y;
-  __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : 
x(x), y(y) {}
-};
-struct ushort4 {
-  unsigned short x, y, z, w;
-  __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, 
unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct int1 {
-  int x;
-  __host__ __device__ int1(int x = 0) : x(x) {}
-};
-struct int2 {
-  int x, y;
-  __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {}
-};
-struct int4 {
-  int x, y, z, w;
-  __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), 
y(y), z(z), w(w) {}
-};
-
-struct uint1 {
-  unsigned x;
-  __host__ __device__ uint1(unsigned x = 0) : x(x) {}
-};
-struct uint2 {
-  unsigned x, y;
-  __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {}
-};
-struct uint3 {
-  unsigned x, y, z;
-  __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : 
x(x), y(y), z(z) {}
-};
-struct uint4 {
-  unsigned x, y, z, w;
-  __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, 
unsigned w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct longlong1 {
-  long long x;
-  __host__ __device__ longlong1(long long x = 0) : x(x) {}
-};
-struct longlong2 {
-  long long x, y;
-  __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) 
{}
-};
-struct longlong4 {
-  long long x, y, z, w;
-  __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z 
= 0, long long w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct ulonglong1 {
-  unsigned long long x;
-  __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {}
-};
-struct ulonglong2 {
-  unsigned long long x, y;
-  __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long 
y = 0) : x(x), y(y) {}
-};
-struct ulonglong4 {
-  unsigned long long x, y, z, w;
-  __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long 
y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), 
w(w) {}
-};
-
-struct float1 {
-  float x;
-  __host__ __device__ float1(float x = 0) : x(x) {}
-};
-struct float2 {
-  float x, y;
-  __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {}
-};
-struct float4 {
-  float x, y, z, w;
-  __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 
0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct double1 {
-  double x;
-  __host__ __device__ double1(double x = 0) : x(x) {}
-};
-struct double2 {
-  double x, y;
-  __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {}
-};
-struct double4 {
-  double x, y, z, w;
-  __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double 
w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-typedef unsigned long long cudaTextureObject_t;
-typedef unsigned long long cudaSurfaceObject_t;
-
-enum cudaTextureReadMode {
-  cudaReadModeNormalizedFloat,
-  cudaReadModeElementType
-};
-
-enum cudaSurfaceBoundaryMode {
-  cudaBoundaryModeZero,
-  cudaBoundaryModeClamp,
-  cudaBoundaryModeTrap
-};
-
-enum {
-  cudaTextureType1D,
-  cudaTextureType2D,
-  cudaTextureType3D,
-  cudaTextureTypeCubemap,
-  cudaTextureType1DLayered,
-  cudaTextureType2DLayered,
-  cudaTextureTypeCubemapLayered
-};
-
-struct textureReference {};
-template <class T, int texType = cudaTextureType1D,
-          enum cudaTextureReadMode mode = cudaReadModeElementType>
-struct __attribute__((device_builtin_texture_type)) texture
-    : public textureReference {};
-
 #include "__clang_cuda_texture_intrinsics.h"
 
 __device__ void surfchar(cudaSurfaceObject_t surf, int x, int y, int z, int 
layer, int face, int layerface) {
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu 
b/clang/test/CodeGenCUDA/offloading-entries.cu
index c053cf586f8f5..c22378105f71d 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -29,7 +29,7 @@
 // CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] 
c"surf\00", section ".llvm.rodata.offloading"
 // CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { 
i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, 
i64 1, ptr null }, section "llvm_offload_entries"
 // CUDA: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] 
c"tex\00", section ".llvm.rodata.offloading"
-// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { 
i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 
1, ptr null }, section "llvm_offload_entries"
+// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { 
i64 0, i16 1, i16 2, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, 
i64 1, ptr null }, section "llvm_offload_entries"
 //.
 // HIP: @managed.managed = global i32 0, align 4
 // HIP: @managed = externally_initialized global ptr null
@@ -44,7 +44,7 @@
 // HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] 
c"surf\00", section ".llvm.rodata.offloading"
 // HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { 
i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, 
i64 1, ptr null }, section "llvm_offload_entries"
 // HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] 
c"tex\00", section ".llvm.rodata.offloading"
-// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { 
i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 
1, ptr null }, section "llvm_offload_entries"
+// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { 
i64 0, i16 1, i16 3, i32 35, ptr @tex, ptr @.offloading.entry_name.5, i64 1, 
i64 1, ptr null }, section "llvm_offload_entries"
 //.
 // CUDA-COFF: @managed = dso_local global i32 undef, align 4
 // CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x 
i8] c"_Z3foov\00", section ".llvm.rodata.offloading"
@@ -58,7 +58,7 @@
 // CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x 
i8] c"surf\00", section ".llvm.rodata.offloading"
 // CUDA-COFF: @.offloading.entry.surf = weak constant 
%struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr 
@.offloading.entry_name.4, i64 4, i64 1, ptr null }, section 
"llvm_offload_entries$OE"
 // CUDA-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x 
i8] c"tex\00", section ".llvm.rodata.offloading"
-// CUDA-COFF: @.offloading.entry.tex = weak constant 
%struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr 
@.offloading.entry_name.5, i64 4, i64 1, ptr null }, section 
"llvm_offload_entries$OE"
+// CUDA-COFF: @.offloading.entry.tex = weak constant 
%struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 35, ptr @tex, ptr 
@.offloading.entry_name.5, i64 1, i64 1, ptr null }, section 
"llvm_offload_entries$OE"
 //.
 // HIP-COFF: @managed.managed = dso_local global i32 0, align 4
 // HIP-COFF: @managed = dso_local externally_initialized global ptr null
@@ -73,7 +73,7 @@
 // HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x 
i8] c"surf\00", section ".llvm.rodata.offloading"
 // HIP-COFF: @.offloading.entry.surf = weak constant 
%struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr 
@.offloading.entry_name.4, i64 4, i64 1, ptr null }, section 
"llvm_offload_entries$OE"
 // HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x 
i8] c"tex\00", section ".llvm.rodata.offloading"
-// HIP-COFF: @.offloading.entry.tex = weak constant 
%struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr 
@.offloading.entry_name.5, i64 4, i64 1, ptr null }, section 
"llvm_offload_entries$OE"
+// HIP-COFF: @.offloading.entry.tex = weak constant 
%struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 35, ptr @tex, ptr 
@.offloading.entry_name.5, i64 1, i64 1, ptr null }, section 
"llvm_offload_entries$OE"
 //.
 // CUDA-LABEL: @_Z18__device_stub__foov(
 // CUDA-NEXT:  entry:
@@ -139,18 +139,6 @@ __device__ __managed__ int managed = 0;
 //
 __global__ void kernel() { external = 1; }
 
-struct surfaceReference { int desc; };
-
-template <typename T, int dim = 1>
-struct __attribute__((device_builtin_surface_type)) surface : public 
surfaceReference {};
-
 surface<void> surf;
 
-struct textureReference {
-  int desc;
-};
-
-template <typename T, int dim = 1, int mode = 0>
-struct __attribute__((device_builtin_texture_type)) texture : public 
textureReference {};
-
-texture<void> tex;
+texture<void, cudaTextureType2D> tex;
diff --git a/clang/test/CodeGenCUDA/propagate-attributes.cu 
b/clang/test/CodeGenCUDA/propagate-attributes.cu
index 5aee677800f2e..6dfd44487d1dc 100644
--- a/clang/test/CodeGenCUDA/propagate-attributes.cu
+++ b/clang/test/CodeGenCUDA/propagate-attributes.cu
@@ -25,6 +25,10 @@
 // RUN:   -fcuda-is-device -funsafe-math-optimizations -triple 
nvptx-unknown-unknown \
 // RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
 
+#ifndef LIB
+#include "Inputs/cuda.h"
+#endif
+
 // Wrap everything in extern "C" so we don't have to worry about name mangling
 // in the IR.
 extern "C" {
@@ -36,7 +40,6 @@ void lib_fn() {}
 
 #else
 
-#include "Inputs/cuda.h"
 __device__ void lib_fn();
 __global__ void kernel() { lib_fn(); }
 

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to