yaxunl updated this revision to Diff 90243.
yaxunl edited the summary of this revision.
https://reviews.llvm.org/D27627
Files:
include/clang/AST/ASTContext.h
include/clang/Basic/TargetInfo.h
lib/AST/ASTContext.cpp
lib/Basic/TargetInfo.cpp
lib/Basic/Targets.cpp
lib/CodeGen/CGBuiltin.cpp
lib/CodeGen/CGCall.cpp
lib/CodeGen/CGClass.cpp
lib/CodeGen/CGDecl.cpp
lib/CodeGen/CGDeclCXX.cpp
lib/CodeGen/CGException.cpp
lib/CodeGen/CGExpr.cpp
lib/CodeGen/CGExprCXX.cpp
lib/CodeGen/CGExprConstant.cpp
lib/CodeGen/CGExprScalar.cpp
lib/CodeGen/CGGPUBuiltin.cpp
lib/CodeGen/CGOpenMPRuntime.cpp
lib/CodeGen/CGVTT.cpp
lib/CodeGen/CGVTables.cpp
lib/CodeGen/CodeGenFunction.cpp
lib/CodeGen/CodeGenFunction.h
lib/CodeGen/CodeGenModule.cpp
lib/CodeGen/CodeGenTypes.cpp
lib/CodeGen/CodeGenTypes.h
lib/CodeGen/ItaniumCXXABI.cpp
test/CodeGenCUDA/address-spaces.cu
test/CodeGenCUDA/convergent.cu
test/CodeGenCUDA/device-var-init.cu
test/CodeGenCUDA/device-vtable.cu
test/CodeGenCUDA/filter-decl.cu
test/CodeGenCUDA/function-overload.cu
test/CodeGenCUDA/kernel-args-alignment.cu
test/CodeGenCUDA/llvm-used.cu
test/CodeGenCUDA/printf.cu
test/CodeGenCXX/amdgcn-global-init.cpp
test/OpenMP/nvptx_parallel_codegen.cpp
Index: test/OpenMP/nvptx_parallel_codegen.cpp
===================================================================
--- test/OpenMP/nvptx_parallel_codegen.cpp
+++ test/OpenMP/nvptx_parallel_codegen.cpp
@@ -2,6 +2,7 @@
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=amdgcn -emit-llvm-bc %s -o %t-x86-host.bc
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
// expected-no-diagnostics
@@ -62,14 +63,14 @@
return a;
}
- // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l17}}_worker()
+ // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l18}}_worker()
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l27}}_worker()
// CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
// CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
// CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -122,7 +123,7 @@
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l26]](i[[SZ:32|64]]
+ // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l27]](i[[SZ:32|64]]
// Create local storage for each capture.
// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]],
// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
@@ -194,7 +195,7 @@
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l43}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l44}}_worker()
// CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
// CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
// CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -238,7 +239,7 @@
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l43]](i[[SZ:32|64]]
+ // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l44]](i[[SZ:32|64]]
// Create local storage for each capture.
// CHECK: [[LOCAL_N:%.+]] = alloca i[[SZ]],
// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]],
Index: test/CodeGenCXX/amdgcn-global-init.cpp
===================================================================
--- /dev/null
+++ test/CodeGenCXX/amdgcn-global-init.cpp
@@ -0,0 +1,211 @@
+// RUN: %clang_cc1 -std=c++11 -triple=amdgcn-amd-amdhsa -emit-llvm -fexceptions %s -o - |FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple=amdgcn-amd-amdhsa -emit-llvm %s -o - |FileCheck -check-prefix CHECK-NOEXC %s
+// RUN: %clang_cc1 -std=c++11 -triple=amdgcn-amd-amdhsa -emit-llvm \
+// RUN: -momit-leaf-frame-pointer -mdisable-fp-elim %s -o - \
+// RUN: | FileCheck -check-prefix CHECK-FP %s
+
+struct A {
+ A();
+ ~A();
+};
+
+struct B { B(); ~B(); };
+
+struct C { void *field; };
+
+struct D { ~D(); };
+
+// CHECK: @__dso_handle = external hidden addrspace(1) global i8
+// CHECK: @c = addrspace(1) global %struct.C zeroinitializer, align 8
+
+// PR6205: The casts should not require global initializers
+// CHECK: @_ZN6PR59741cE = external addrspace(1) global %"struct.PR5974::C"
+// CHECK: @_ZN6PR59741aE = addrspace(1) global %"struct.PR5974::A" addrspace(4)* addrspacecast (%"struct.PR5974::A" addrspace(1)* getelementptr inbounds (%"struct.PR5974::C", %"struct.PR5974::C" addrspace(1)* @_ZN6PR59741cE, i32 0, i32 0) to %"struct.PR5974::A" addrspace(4)*), align 8
+// CHECK: @_ZN6PR59741bE = addrspace(1) global %"struct.PR5974::B" addrspace(4)* bitcast (i8 addrspace(4)* getelementptr (i8, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast (%"struct.PR5974::C" addrspace(1)* @_ZN6PR59741cE to i8 addrspace(1)*) to i8 addrspace(4)*), i64 4) to %"struct.PR5974::B" addrspace(4)*), align 8
+
+// CHECK: call void @_ZN1AC1Ev(%struct.A addrspace(4)* addrspacecast (%struct.A addrspace(1)* @a to %struct.A addrspace(4)*))
+// CHECK: call i32 @__cxa_atexit(void (i8 addrspace(4)*)* bitcast (void (%struct.A addrspace(4)*)* @_ZN1AD1Ev to void (i8 addrspace(4)*)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds (%struct.A, %struct.A addrspace(1)* @a, i32 0, i32 0) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* @__dso_handle to i8 addrspace(4)*))
+A a;
+
+// CHECK: call void @_ZN1BC1Ev(%struct.B addrspace(4)* addrspacecast (%struct.B addrspace(1)* @b to %struct.B addrspace(4)*))
+// CHECK: call i32 @__cxa_atexit(void (i8 addrspace(4)*)* bitcast (void (%struct.B addrspace(4)*)* @_ZN1BD1Ev to void (i8 addrspace(4)*)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds (%struct.B, %struct.B addrspace(1)* @b, i32 0, i32 0) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* @__dso_handle to i8 addrspace(4)*))
+B b;
+
+// PR6205: this should not require a global initializer
+// CHECK-NOT: call void @_ZN1CC1Ev
+C c;
+
+// CHECK: call i32 @__cxa_atexit(void (i8 addrspace(4)*)* bitcast (void (%struct.D addrspace(4)*)* @_ZN1DD1Ev to void (i8 addrspace(4)*)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds (%struct.D, %struct.D addrspace(1)* @d, i32 0, i32 0) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* @__dso_handle to i8 addrspace(4)*))
+D d;
+
+// <rdar://problem/7458115>
+namespace test1 {
+ int f();
+ const int x = f(); // This has side-effects and gets emitted immediately.
+ const int y = x - 1; // This gets deferred.
+ const int z = ~y; // This also gets deferred, but gets "undeferred" before y.
+ int test() { return z; }
+// CHECK-LABEL: define i32 @_ZN5test14testEv()
+
+ // All of these initializers end up delayed, so we check them later.
+}
+
+// <rdar://problem/8246444>
+namespace test2 {
+ struct allocator { allocator(); ~allocator(); };
+ struct A { A(const allocator &a = allocator()); ~A(); };
+
+ A a;
+// CHECK: call void @_ZN5test29allocatorC1Ev(
+// CHECK: invoke void @_ZN5test21AC1ERKNS_9allocatorE(
+// CHECK: call void @_ZN5test29allocatorD1Ev(
+// CHECK: call i32 @__cxa_atexit({{.*}} @_ZN5test21AD1Ev {{.*}} @_ZN5test21aE
+}
+
+namespace test3 {
+ // Tested at the beginning of the file.
+ const char * const var = "string";
+ extern const char * const var;
+
+ const char *test() { return var; }
+}
+
+namespace test4 {
+ struct A {
+ A();
+ };
+ extern int foo();
+
+ // This needs an initialization function and guard variables.
+ // CHECK: load i8, i8 addrspace(1)* bitcast (i64 addrspace(1)* @_ZGVN5test41xE to i8 addrspace(1)*)
+ // CHECK: [[CALL:%.*]] = call i32 @_ZN5test43fooEv
+ // CHECK-NEXT: store i32 %call, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @_ZN5test41xE to i32 addrspace(4)*)
+ // CHECK-NEXT: store i64 1, i64 addrspace(1)* @_ZGVN5test41xE
+ __attribute__((weak)) int x = foo();
+}
+
+namespace PR5974 {
+ struct A { int a; };
+ struct B { int b; };
+ struct C : A, B { int c; };
+
+ extern C c;
+
+ // These should not require global initializers.
+ A* a = &c;
+ B* b = &c;
+}
+
+// PR9570: the indirect field shouldn't crash IR gen.
+namespace test5 {
+ static union {
+ unsigned bar[4096] __attribute__((aligned(128)));
+ };
+}
+
+namespace std { struct type_info; }
+
+namespace test6 {
+ struct A { virtual ~A(); };
+ struct B : A {};
+ extern A *p;
+
+ // We must emit a dynamic initializer for 'q', because it could throw.
+ B *const q = &dynamic_cast<B&>(*p);
+ // CHECK: call void @__cxa_bad_cast()
+ // CHECK: store {{.*}} @_ZN5test6L1qE
+
+ // We don't need to emit 'r' at all, because it has internal linkage, is
+ // unused, and its initialization has no side-effects.
+ B *const r = dynamic_cast<B*>(p);
+ // CHECK-NOT: call void @__cxa_bad_cast()
+ // CHECK-NOT: store {{.*}} @_ZN5test6L1rE
+
+ // This can throw, so we need to emit it.
+ const std::type_info *const s = &typeid(*p);
+ // CHECK: store {{.*}} @_ZN5test6L1sE
+
+ // This can't throw, so we don't.
+ const std::type_info *const t = &typeid(p);
+ // CHECK-NOT: @_ZN5test6L1tE
+
+ extern B *volatile v;
+ // CHECK: store {{.*}} @_ZN5test6L1wE
+ B *const w = dynamic_cast<B*>(v);
+
+ // CHECK: load volatile
+ // CHECK: store {{.*}} @_ZN5test6L1xE
+ const int x = *(volatile int*)0x1234;
+
+ namespace {
+ int a = int();
+ volatile int b = int();
+ int c = a;
+ int d = b;
+ // CHECK-NOT: store {{.*}} @_ZN5test6{{[A-Za-z0-9_]*}}1aE
+ // CHECK-NOT: store {{.*}} @_ZN5test6{{[A-Za-z0-9_]*}}1bE
+ // CHECK-NOT: store {{.*}} @_ZN5test6{{[A-Za-z0-9_]*}}1cE
+ // CHECK: load volatile {{.*}} @_ZN5test6{{[A-Za-z0-9_]*}}1bE
+ // CHECK: store {{.*}} @_ZN5test6{{[A-Za-z0-9_]*}}1dE
+ }
+}
+
+namespace test7 {
+ struct A { A(); };
+ struct B { ~B(); int n; };
+ struct C { C() = default; C(const C&); int n; };
+ struct D {};
+
+ // CHECK: call void @_ZN5test71AC1Ev({{.*}}@_ZN5test7L1aE
+ const A a = A();
+
+ // CHECK: call i32 @__cxa_atexit({{.*}} @_ZN5test71BD1Ev{{.*}} @_ZN5test7L2b1E
+ // CHECK: call i32 @__cxa_atexit({{.*}} @_ZN5test71BD1Ev{{.*}} @_ZGRN5test72b2E
+ // CHECK: call void @_ZN5test71BD1Ev(
+ // CHECK: store {{.*}} @_ZN5test7L2b3E
+ const B b1 = B();
+ const B &b2 = B();
+ const int b3 = B().n;
+
+ // CHECK-NOT: @_ZN5test7L2c1E
+ // CHECK: call void @llvm.memset{{.*}} @_ZN5test7L2c1E
+ // CHECK-NOT: @_ZN5test7L2c1E
+ // CHECK: @_ZN5test7L2c2E
+ // CHECK-NOT: @_ZN5test7L2c3E
+ // CHECK: @_ZN5test7L2c4E
+ const C c1 = C();
+ const C c2 = static_cast<const C&>(C());
+ const int c3 = C().n;
+ const int c4 = C(C()).n;
+
+ // CHECK-NOT: @_ZN5test7L1dE
+ const D d = D();
+
+ // CHECK: store {{.*}} @_ZN5test71eE
+ int f(), e = f();
+}
+
+
+// At the end of the file, we check that y is initialized before z.
+
+// CHECK: define internal void [[TEST1_Z_INIT:@.*]]()
+// CHECK: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(2)* @_ZN5test1L1yE to i32 addrspace(4)*)
+// CHECK-NEXT: xor
+// CHECK-NEXT: store i32 {{.*}}, i32 addrspace(4)* addrspacecast (i32 addrspace(2)* @_ZN5test1L1zE to i32 addrspace(4)*)
+// CHECK: define internal void [[TEST1_Y_INIT:@.*]]()
+// CHECK: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(2)* @_ZN5test1L1xE to i32 addrspace(4)*)
+// CHECK-NEXT: sub
+// CHECK-NEXT: store i32 {{.*}}, i32 addrspace(4)* addrspacecast (i32 addrspace(2)* @_ZN5test1L1yE to i32 addrspace(4)*)
+
+// CHECK: define internal void @_GLOBAL__sub_I_amdgcn_global_init.cpp() #{{[0-9]+}}
+// CHECK: call void [[TEST1_Y_INIT]]
+// CHECK: call void [[TEST1_Z_INIT]]
+
+// rdar://problem/8090834: this should be nounwind
+// CHECK-NOEXC: define internal void @_GLOBAL__sub_I_amdgcn_global_init.cpp() [[NUW:#[0-9]+]]
+
+// CHECK-NOEXC: attributes [[NUW]] = { noinline nounwind{{.*}} }
+
+// PR21811: attach the appropriate attribute to the global init function
+// CHECK-FP: define internal void @_GLOBAL__sub_I_amdgcn_global_init.cpp() [[NUX:#[0-9]+]]
+// CHECK-FP: attributes [[NUX]] = { noinline nounwind {{.*}}"no-frame-pointer-elim-non-leaf"{{.*}} }
Index: test/CodeGenCUDA/printf.cu
===================================================================
--- test/CodeGenCUDA/printf.cu
+++ test/CodeGenCUDA/printf.cu
@@ -2,38 +2,46 @@
// REQUIRES: nvptx-registered-target
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm \
-// RUN: -o - %s | FileCheck %s
+// RUN: -o - %s | FileCheck -check-prefixes=CHECK,NVPTX %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm \
+// RUN: -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s
#include "Inputs/cuda.h"
extern "C" __device__ int vprintf(const char*, const char*);
// Check a simple call to printf end-to-end.
// CHECK: [[SIMPLE_PRINTF_TY:%[a-zA-Z0-9_]+]] = type { i32, i64, double }
+// CHECK-LABEL: define i32 @_Z11CheckSimplev()
__device__ int CheckSimple() {
- // CHECK: [[BUF:%[a-zA-Z0-9_]+]] = alloca [[SIMPLE_PRINTF_TY]]
+ // NVPTX: [[BUF:%[a-zA-Z0-9_]+]] = alloca [[SIMPLE_PRINTF_TY]]
+ // AMDGCN: [[ALLOCA:%[a-zA-Z0-9_]+]] = alloca [[SIMPLE_PRINTF_TY]]
+ // AMDGCN: [[BUF:%[a-zA-Z0-9_]+]] = addrspacecast %printf_args* [[ALLOCA]] to %printf_args addrspace(4)*
// CHECK: [[FMT:%[0-9]+]] = load{{.*}}%fmt
const char* fmt = "%d %lld %f";
- // CHECK: [[PTR0:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 0
- // CHECK: store i32 1, i32* [[PTR0]], align 4
- // CHECK: [[PTR1:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 1
- // CHECK: store i64 2, i64* [[PTR1]], align 8
- // CHECK: [[PTR2:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 2
- // CHECK: store double 3.0{{[^,]*}}, double* [[PTR2]], align 8
- // CHECK: [[BUF_CAST:%[0-9]+]] = bitcast [[SIMPLE_PRINTF_TY]]* [[BUF]] to i8*
- // CHECK: [[RET:%[0-9]+]] = call i32 @vprintf(i8* [[FMT]], i8* [[BUF_CAST]])
+ // CHECK: [[PTR0:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]{{.*}}* [[BUF]], i32 0, i32 0
+ // CHECK: store i32 1, i32{{.*}}* [[PTR0]], align 4
+ // CHECK: [[PTR1:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]{{.*}}* [[BUF]], i32 0, i32 1
+ // CHECK: store i64 2, i64{{.*}}* [[PTR1]], align 8
+ // CHECK: [[PTR2:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]{{.*}}* [[BUF]], i32 0, i32 2
+ // CHECK: store double 3.0{{[^,]*}}, double{{.*}}* [[PTR2]], align 8
+ // CHECK: [[BUF_CAST:%[0-9]+]] = bitcast [[SIMPLE_PRINTF_TY]]{{.*}}* [[BUF]] to i8{{.*}}*
+ // CHECK: [[RET:%[0-9]+]] = call i32 @vprintf(i8{{.*}}* [[FMT]], i8{{.*}}* [[BUF_CAST]])
// CHECK: ret i32 [[RET]]
return printf(fmt, 1, 2ll, 3.0);
}
+// CHECK-LABEL: define void @_Z11CheckNoArgsv()
__device__ void CheckNoArgs() {
- // CHECK: call i32 @vprintf({{.*}}, i8* null){{$}}
+ // CHECK: call i32 @vprintf({{.*}}, i8{{.*}}* null){{$}}
printf("hello, world!");
}
// Check that printf's alloca happens in the entry block, not inside the if
// statement.
__device__ bool foo();
+// CHECK-LABEL: define void @_Z25CheckAllocaIsInEntryBlockv()
__device__ void CheckAllocaIsInEntryBlock() {
// CHECK: alloca %printf_args
// CHECK: call {{.*}} @_Z3foov()
Index: test/CodeGenCUDA/llvm-used.cu
===================================================================
--- test/CodeGenCUDA/llvm-used.cu
+++ test/CodeGenCUDA/llvm-used.cu
@@ -1,8 +1,10 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx64-unknown-unknown | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx64-unknown-unknown | FileCheck -check-prefix=NVPTX %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple amdgcn-amd-amdhsa | FileCheck -check-prefix=AMDGCN %s
// Make sure we emit the proper addrspacecast for llvm.used. PR22383 exposed an
// issue where we were generating a bitcast instead of an addrspacecast.
-// CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast ([0 x i32] addrspace(1)* @a to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
+// NVPTX: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast ([0 x i32] addrspace(1)* @a to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
+// AMDGCN: @llvm.used = appending global [1 x i8 addrspace(4)*] [i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ([0 x i32] addrspace(1)* @a to i8 addrspace(1)*) to i8 addrspace(4)*)], section "llvm.metadata"
__attribute__((device)) __attribute__((__used__)) int a[] = {};
Index: test/CodeGenCUDA/kernel-args-alignment.cu
===================================================================
--- test/CodeGenCUDA/kernel-args-alignment.cu
+++ test/CodeGenCUDA/kernel-args-alignment.cu
@@ -1,8 +1,11 @@
// RUN: %clang_cc1 --std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | \
-// RUN: FileCheck -check-prefix HOST -check-prefix CHECK %s
+// RUN: FileCheck -check-prefixes=HOST,CHECK %s
// RUN: %clang_cc1 --std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda \
-// RUN: -emit-llvm -o - %s | FileCheck -check-prefix DEVICE -check-prefix CHECK %s
+// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,CHECK,NVPTX %s
+
+// RUN: %clang_cc1 --std=c++11 -fcuda-is-device -triple amdgcn-amd-amdhsa \
+// RUN: -emit-llvm -o - %s -DAMDGCN| FileCheck -check-prefixes=DEVICE,CHECK,AMDGCN %s
#include "Inputs/cuda.h"
@@ -18,7 +21,9 @@
// Clang should generate a packed LLVM struct for S (denoted by the <>s),
// otherwise this test isn't interesting.
-// CHECK: %struct.S = type <{ i32*, i8, %struct.U, [5 x i8] }>
+// HOST: %struct.S = type <{ i32*, i8, %struct.U, [5 x i8] }>
+// NVPTX: %struct.S = type <{ i32*, i8, %struct.U, [5 x i8] }>
+// AMDGCN: %struct.S = type <{ i32 addrspace(4)*, i8, %struct.U, [5 x i8] }>
static_assert(alignof(S) == 8, "Unexpected alignment.");
@@ -32,5 +37,6 @@
// HOST: call i32 @cudaSetupArgument({{[^,]*}}, i64 8, i64 24)
// DEVICE-LABEL: @_Z6kernelc1SPi
-// DEVICE-SAME: i8{{[^,]*}}, %struct.S* byval align 8{{[^,]*}}, i32*
+// NVPTX-SAME: i8{{[^,]*}}, %struct.S* byval align 8{{[^,]*}}, i32*
+// AMDGCN-SAME: i8{{[^,]*}}, %struct.S* byval align 8{{[^,]*}}, i32 addrspace(4)*
__global__ void kernel(char a, S s, int *b) {}
Index: test/CodeGenCUDA/function-overload.cu
===================================================================
--- test/CodeGenCUDA/function-overload.cu
+++ test/CodeGenCUDA/function-overload.cu
@@ -8,6 +8,8 @@
// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -o - %s \
// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -o - %s \
+// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s
#include "Inputs/cuda.h"
Index: test/CodeGenCUDA/filter-decl.cu
===================================================================
--- test/CodeGenCUDA/filter-decl.cu
+++ test/CodeGenCUDA/filter-decl.cu
@@ -1,5 +1,6 @@
// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-HOST %s
-// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - -fcuda-is-device | FileCheck -check-prefix=CHECK-DEVICE %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - -fcuda-is-device | FileCheck -check-prefixes=CHECK-DEVICE,ITANIUM %s
+// RUN: %clang_cc1 -triple amdgcn -emit-llvm %s -o - -fcuda-is-device | FileCheck -check-prefixes=CHECK-DEVICE,AMDGCN %s
#include "Inputs/cuda.h"
@@ -10,15 +11,18 @@
__asm__("file scope asm is host only");
// CHECK-HOST: constantdata = internal global
-// CHECK-DEVICE: constantdata = externally_initialized global
+// ITANIUM: constantdata = externally_initialized global
+// AMDGCN: constantdata = addrspace(2) externally_initialized global
__constant__ char constantdata[256];
// CHECK-HOST: devicedata = internal global
-// CHECK-DEVICE: devicedata = externally_initialized global
+// ITANIUM: devicedata = externally_initialized global
+// AMDGCN: devicedata = addrspace(1) externally_initialized global
__device__ char devicedata[256];
// CHECK-HOST: shareddata = internal global
-// CHECK-DEVICE: shareddata = global
+// ITANIUM: shareddata = global
+// AMDGCN: shareddata = addrspace(3) global
__shared__ char shareddata[256];
// CHECK-HOST: hostdata = global
Index: test/CodeGenCUDA/device-vtable.cu
===================================================================
--- test/CodeGenCUDA/device-vtable.cu
+++ test/CodeGenCUDA/device-vtable.cu
@@ -10,6 +10,8 @@
// RUN: | FileCheck %s -check-prefix=CHECK-HOST -check-prefix=CHECK-BOTH
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -o - %s \
// RUN: | FileCheck %s -check-prefix=CHECK-DEVICE -check-prefix=CHECK-BOTH
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -o - %s \
+// RUN: | FileCheck %s -check-prefix=CHECK-DEVICE -check-prefix=CHECK-BOTH
#include "Inputs/cuda.h"
Index: test/CodeGenCUDA/device-var-init.cu
===================================================================
--- test/CodeGenCUDA/device-var-init.cu
+++ test/CodeGenCUDA/device-var-init.cu
@@ -4,7 +4,10 @@
// variables, but accept empty constructors allowed by CUDA.
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \
-// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck %s
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,NVPTX %s
+
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCN %s
#ifdef __clang__
#include "Inputs/cuda.h"
@@ -18,7 +21,8 @@
__shared__ int s_v;
// CHECK: @s_v = addrspace(3) global i32 undef,
__constant__ int c_v;
-// CHECK: addrspace(4) externally_initialized global i32 0,
+// NVPTX: addrspace(4) externally_initialized global i32 0,
+// AMDGCN: addrspace(2) externally_initialized global i32 0,
__device__ int d_v_i = 1;
// CHECK: @d_v_i = addrspace(1) externally_initialized global i32 1,
@@ -29,81 +33,92 @@
__shared__ T s_t;
// CHECK: @s_t = addrspace(3) global %struct.T undef,
__constant__ T c_t;
-// CHECK: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer,
+// NVPTX: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer,
+// AMDGCN: @c_t = addrspace(2) externally_initialized global %struct.T zeroinitializer,
__device__ T d_t_i = {2};
// CHECK: @d_t_i = addrspace(1) externally_initialized global %struct.T { i32 2 },
__constant__ T c_t_i = {2};
-// CHECK: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 },
+// NVPTX: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 },
+// AMDGCN: @c_t_i = addrspace(2) externally_initialized global %struct.T { i32 2 },
// empty constructor
__device__ EC d_ec;
// CHECK: @d_ec = addrspace(1) externally_initialized global %struct.EC zeroinitializer,
__shared__ EC s_ec;
// CHECK: @s_ec = addrspace(3) global %struct.EC undef,
__constant__ EC c_ec;
-// CHECK: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer,
+// NVPTX: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer,
+// AMDGCN: @c_ec = addrspace(2) externally_initialized global %struct.EC zeroinitializer,
// empty destructor
__device__ ED d_ed;
// CHECK: @d_ed = addrspace(1) externally_initialized global %struct.ED zeroinitializer,
__shared__ ED s_ed;
// CHECK: @s_ed = addrspace(3) global %struct.ED undef,
__constant__ ED c_ed;
-// CHECK: @c_ed = addrspace(4) externally_initialized global %struct.ED zeroinitializer,
+// NVPTX: @c_ed = addrspace(4) externally_initialized global %struct.ED zeroinitializer,
+// AMDGCN: @c_ed = addrspace(2) externally_initialized global %struct.ED zeroinitializer,
__device__ ECD d_ecd;
// CHECK: @d_ecd = addrspace(1) externally_initialized global %struct.ECD zeroinitializer,
__shared__ ECD s_ecd;
// CHECK: @s_ecd = addrspace(3) global %struct.ECD undef,
__constant__ ECD c_ecd;
-// CHECK: @c_ecd = addrspace(4) externally_initialized global %struct.ECD zeroinitializer,
+// NVPTX: @c_ecd = addrspace(4) externally_initialized global %struct.ECD zeroinitializer,
+// AMDGCN: @c_ecd = addrspace(2) externally_initialized global %struct.ECD zeroinitializer,
// empty templated constructor -- allowed with no arguments
__device__ ETC d_etc;
// CHECK: @d_etc = addrspace(1) externally_initialized global %struct.ETC zeroinitializer,
__shared__ ETC s_etc;
// CHECK: @s_etc = addrspace(3) global %struct.ETC undef,
__constant__ ETC c_etc;
-// CHECK: @c_etc = addrspace(4) externally_initialized global %struct.ETC zeroinitializer,
+// NVPTX: @c_etc = addrspace(4) externally_initialized global %struct.ETC zeroinitializer,
+// AMDGCN: @c_etc = addrspace(2) externally_initialized global %struct.ETC zeroinitializer,
__device__ NCFS d_ncfs;
// CHECK: @d_ncfs = addrspace(1) externally_initialized global %struct.NCFS { i32 3 }
__constant__ NCFS c_ncfs;
-// CHECK: @c_ncfs = addrspace(4) externally_initialized global %struct.NCFS { i32 3 }
+// NVPTX: @c_ncfs = addrspace(4) externally_initialized global %struct.NCFS { i32 3 }
+// AMDGCN: @c_ncfs = addrspace(2) externally_initialized global %struct.NCFS { i32 3 }
// Regular base class -- allowed
__device__ T_B_T d_t_b_t;
// CHECK: @d_t_b_t = addrspace(1) externally_initialized global %struct.T_B_T zeroinitializer,
__shared__ T_B_T s_t_b_t;
// CHECK: @s_t_b_t = addrspace(3) global %struct.T_B_T undef,
__constant__ T_B_T c_t_b_t;
-// CHECK: @c_t_b_t = addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer,
+// NVPTX: @c_t_b_t = addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer,
+// AMDGCN: @c_t_b_t = addrspace(2) externally_initialized global %struct.T_B_T zeroinitializer,
// Incapsulated object of allowed class -- allowed
__device__ T_F_T d_t_f_t;
// CHECK: @d_t_f_t = addrspace(1) externally_initialized global %struct.T_F_T zeroinitializer,
__shared__ T_F_T s_t_f_t;
// CHECK: @s_t_f_t = addrspace(3) global %struct.T_F_T undef,
__constant__ T_F_T c_t_f_t;
-// CHECK: @c_t_f_t = addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer,
+// NVPTX: @c_t_f_t = addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer,
+// AMDGCN: @c_t_f_t = addrspace(2) externally_initialized global %struct.T_F_T zeroinitializer,
// array of allowed objects -- allowed
__device__ T_FA_T d_t_fa_t;
// CHECK: @d_t_fa_t = addrspace(1) externally_initialized global %struct.T_FA_T zeroinitializer,
__shared__ T_FA_T s_t_fa_t;
// CHECK: @s_t_fa_t = addrspace(3) global %struct.T_FA_T undef,
__constant__ T_FA_T c_t_fa_t;
-// CHECK: @c_t_fa_t = addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer,
+// NVPTX: @c_t_fa_t = addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer,
+// AMDGCN: @c_t_fa_t = addrspace(2) externally_initialized global %struct.T_FA_T zeroinitializer,
// Calling empty base class initializer is OK
__device__ EC_I_EC d_ec_i_ec;
// CHECK: @d_ec_i_ec = addrspace(1) externally_initialized global %struct.EC_I_EC zeroinitializer,
__shared__ EC_I_EC s_ec_i_ec;
// CHECK: @s_ec_i_ec = addrspace(3) global %struct.EC_I_EC undef,
__constant__ EC_I_EC c_ec_i_ec;
-// CHECK: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer,
+// NVPTX: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer,
+// AMDGCN: @c_ec_i_ec = addrspace(2) externally_initialized global %struct.EC_I_EC zeroinitializer,
// We should not emit global initializers for device-side variables.
// CHECK-NOT: @__cxx_global_var_init
@@ -114,82 +129,111 @@
T t;
// CHECK-NOT: call
EC ec;
- // CHECK: call void @_ZN2ECC1Ev(%struct.EC* %ec)
+ // NVPTX: call void @_ZN2ECC1Ev(%struct.EC* %ec)
+ // AMDGCN: call void @_ZN2ECC1Ev(%struct.EC addrspace(4)* %ec)
ED ed;
// CHECK-NOT: call
ECD ecd;
- // CHECK: call void @_ZN3ECDC1Ev(%struct.ECD* %ecd)
+ // NVPTX: call void @_ZN3ECDC1Ev(%struct.ECD* %ecd)
+ // AMDGCN: call void @_ZN3ECDC1Ev(%struct.ECD addrspace(4)* %ecd)
ETC etc;
- // CHECK: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc)
+ // NVPTX: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc)
+ // AMDGCN: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC addrspace(4)* %etc)
UC uc;
// undefined constructor -- not allowed
- // CHECK: call void @_ZN2UCC1Ev(%struct.UC* %uc)
+ // NVPTX: call void @_ZN2UCC1Ev(%struct.UC* %uc)
+ // AMDGCN: call void @_ZN2UCC1Ev(%struct.UC addrspace(4)* %uc)
UD ud;
// undefined destructor -- not allowed
// CHECK-NOT: call
ECI eci;
// empty constructor w/ initializer list -- not allowed
- // CHECK: call void @_ZN3ECIC1Ev(%struct.ECI* %eci)
+ // NVPTX: call void @_ZN3ECIC1Ev(%struct.ECI* %eci)
+ // AMDGCN: call void @_ZN3ECIC1Ev(%struct.ECI addrspace(4)* %eci)
NEC nec;
// non-empty constructor -- not allowed
- // CHECK: call void @_ZN3NECC1Ev(%struct.NEC* %nec)
+ // NVPTX: call void @_ZN3NECC1Ev(%struct.NEC* %nec)
+ // AMDGCN: call void @_ZN3NECC1Ev(%struct.NEC addrspace(4)* %nec)
// non-empty destructor -- not allowed
NED ned;
// no-constructor, virtual method -- not allowed
- // CHECK: call void @_ZN3NCVC1Ev(%struct.NCV* %ncv)
+ // NVPTX: call void @_ZN3NCVC1Ev(%struct.NCV* %ncv)
+ // AMDGCN: call void @_ZN3NCVC1Ev(%struct.NCV addrspace(4)* %ncv)
NCV ncv;
// CHECK-NOT: call
VD vd;
- // CHECK: call void @_ZN2VDC1Ev(%struct.VD* %vd)
+ // NVPTX: call void @_ZN2VDC1Ev(%struct.VD* %vd)
+ // AMDGCN: call void @_ZN2VDC1Ev(%struct.VD addrspace(4)* %vd)
NCF ncf;
- // CHECK: call void @_ZN3NCFC1Ev(%struct.NCF* %ncf)
+ // NVPTX: call void @_ZN3NCFC1Ev(%struct.NCF* %ncf)
+ // AMDGCN: call void @_ZN3NCFC1Ev(%struct.NCF addrspace(4)* %ncf)
NCFS ncfs;
- // CHECK: call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs)
+ // NVPTX: call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs)
+ // AMDGCN: call void @_ZN4NCFSC1Ev(%struct.NCFS addrspace(4)* %ncfs)
UTC utc;
- // CHECK: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc)
+ // NVPTX: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc)
+ // AMDGCN: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC addrspace(4)* %utc)
NETC netc;
- // CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc)
+ // NVPTX: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc)
+ // AMDGCN: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC addrspace(4)* %netc)
T_B_T t_b_t;
// CHECK-NOT: call
T_F_T t_f_t;
// CHECK-NOT: call
T_FA_T t_fa_t;
// CHECK-NOT: call
EC_I_EC ec_i_ec;
- // CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec)
+ // NVPTX: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec)
+ // AMDGCN: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC addrspace(4)* %ec_i_ec)
EC_I_EC1 ec_i_ec1;
- // CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1)
+ // NVPTX: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1)
+ // AMDGCN: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1 addrspace(4)* %ec_i_ec1)
T_V_T t_v_t;
- // CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t)
+ // NVPTX: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t)
+ // AMDGCN: call void @_ZN5T_V_TC1Ev(%struct.T_V_T addrspace(4)* %t_v_t)
T_B_NEC t_b_nec;
- // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec)
+ // NVPTX: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec)
+ // AMDGCN: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC addrspace(4)* %t_b_nec)
T_F_NEC t_f_nec;
- // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec)
+ // NVPTX: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec)
+ // AMDGCN: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC addrspace(4)* %t_f_nec)
T_FA_NEC t_fa_nec;
- // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)
+ // NVPTX: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)
+ // AMDGCN: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC addrspace(4)* %t_fa_nec)
T_B_NED t_b_ned;
// CHECK-NOT: call
T_F_NED t_f_ned;
// CHECK-NOT: call
T_FA_NED t_fa_ned;
// CHECK-NOT: call
static __shared__ EC s_ec;
- // CHECK-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*))
+ // NVPTX-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*))
+ // AMDGCN-NOT: call void @_ZN2ECC1Ev(%struct.EC addrspace(4)* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC addrspace(4)*))
static __shared__ ETC s_etc;
- // CHECK-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*))
+ // NVPTX-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*))
+ // AMDGCN-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC addrspace(4)* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC addrspace(4)*))
// anchor point separating constructors and destructors
df(); // CHECK: call void @_Z2dfv()
// Verify that we only call non-empty destructors
- // CHECK-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %t_fa_ned)
- // CHECK-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %t_f_ned)
- // CHECK-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %t_b_ned)
- // CHECK-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %vd)
- // CHECK-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %ned)
- // CHECK-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %ud)
- // CHECK-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %ecd)
- // CHECK-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %ed)
+ // NVPTX-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %t_fa_ned)
+ // NVPTX-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %t_f_ned)
+ // NVPTX-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %t_b_ned)
+ // NVPTX-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %vd)
+ // NVPTX-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %ned)
+ // NVPTX-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %ud)
+ // NVPTX-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %ecd)
+ // NVPTX-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %ed)
+
+ // AMDGCN-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED addrspace(4)* %t_fa_ned)
+ // AMDGCN-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED addrspace(4)* %t_f_ned)
+ // AMDGCN-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED addrspace(4)* %t_b_ned)
+ // AMDGCN-NEXT: call void @_ZN2VDD1Ev(%struct.VD addrspace(4)* %vd)
+ // AMDGCN-NEXT: call void @_ZN3NEDD1Ev(%struct.NED addrspace(4)* %ned)
+ // AMDGCN-NEXT: call void @_ZN2UDD1Ev(%struct.UD addrspace(4)* %ud)
+ // AMDGCN-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD addrspace(4)* %ecd)
+ // AMDGCN-NEXT: call void @_ZN2EDD1Ev(%struct.ED addrspace(4)* %ed)
// CHECK-NEXT: ret void
}
Index: test/CodeGenCUDA/convergent.cu
===================================================================
--- test/CodeGenCUDA/convergent.cu
+++ test/CodeGenCUDA/convergent.cu
@@ -2,6 +2,9 @@
// REQUIRES: nvptx-registered-target
// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \
+// RUN: -disable-llvm-passes -o - %s -DNVPTX | FileCheck -check-prefixes=DEVICE,NVPTX %s
+
+// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn -emit-llvm \
// RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix DEVICE %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
@@ -25,9 +28,11 @@
__host__ __device__ void bar() {
// DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
baz();
- // DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]]
+ #ifdef NVPTX
+ // NVPTX: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]]
int x;
asm ("trap;" : "=l"(x));
+ #endif
// DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]]
asm volatile ("trap;");
}
Index: test/CodeGenCUDA/address-spaces.cu
===================================================================
--- test/CodeGenCUDA/address-spaces.cu
+++ test/CodeGenCUDA/address-spaces.cu
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck --check-prefixes=NVPTX,CHECK %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple amdgcn | FileCheck --check-prefixes=AMDGCN,CHECK %s
// Verifies Clang emits correct address spaces and addrspacecast instructions
// for CUDA code.
@@ -8,7 +9,8 @@
// CHECK: @i = addrspace(1) externally_initialized global
__device__ int i;
-// CHECK: @j = addrspace(4) externally_initialized global
+// AMDGCN: @j = addrspace(2) externally_initialized global
+// NVPTX: @j = addrspace(4) externally_initialized global
__constant__ int j;
// CHECK: @k = addrspace(3) global
@@ -27,17 +29,21 @@
// CHECK: @b = addrspace(3) global float undef
__device__ void foo() {
- // CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*)
+ // NVPTX: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*)
+ // AMDGCN: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @i to i32 addrspace(4)*)
i++;
- // CHECK: load i32, i32* addrspacecast (i32 addrspace(4)* @j to i32*)
+ // NVPTX: load i32, i32* addrspacecast (i32 addrspace(4)* @j to i32*)
+ // AMDGCN: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(2)* @j to i32 addrspace(4)*)
j++;
- // CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @k to i32*)
+ // NVPTX: load i32, i32* addrspacecast (i32 addrspace(3)* @k to i32*)
+ // AMDGCN: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @k to i32 addrspace(4)*)
k++;
__shared__ int lk;
- // CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32*)
+ // NVPTX: load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32*)
+ // AMDGCN: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32 addrspace(4)*)
lk++;
}
@@ -47,8 +53,9 @@
ap->data1 = 1;
ap->data2 = 2;
}
-// CHECK: define void @_Z5func0v()
-// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %ap
+// CHECK-LABEL: define void @_Z5func0v()
+// NVPTX: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %ap
+// AMDGCN: store %struct.MyStruct addrspace(4)* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct addrspace(4)*), %struct.MyStruct addrspace(4)* addrspace(4)* %ap
__device__ void callee(float *ap) {
*ap = 1.0f;
@@ -58,37 +65,42 @@
__shared__ float a;
callee(&a); // implicit cast from parameters
}
-// CHECK: define void @_Z5func1v()
-// CHECK: call void @_Z6calleePf(float* addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float*))
+// CHECK-LABEL: define void @_Z5func1v()
+// NVPTX: call void @_Z6calleePf(float* addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float*))
+// AMDGCN: call void @_Z6calleePf(float addrspace(4)* addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float addrspace(4)*))
__device__ void func2() {
__shared__ float a[256];
float *ap = &a[128]; // implicit cast from a decayed array
*ap = 1.0f;
}
-// CHECK: define void @_Z5func2v()
-// CHECK: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i32 0, i32 128), float** %ap
-
+// CHECK-LABEL: define void @_Z5func2v()
+// NVPTX: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i32 0, i32 128), float** %ap
+// AMDGCN: store float addrspace(4)* getelementptr inbounds ([256 x float], [256 x float] addrspace(4)* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float] addrspace(4)*), i64 0, i64 128), float addrspace(4)* addrspace(4)* %ap
__device__ void func3() {
__shared__ float a;
float *ap = reinterpret_cast<float *>(&a); // explicit cast
*ap = 1.0f;
}
-// CHECK: define void @_Z5func3v()
-// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %ap
+// CHECK-LABEL: define void @_Z5func3v()
+// NVPTX: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %ap
+// AMDGCN: store float addrspace(4)* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float addrspace(4)*), float addrspace(4)* addrspace(4)* %ap
__device__ void func4() {
__shared__ float a;
float *ap = (float *)&a; // explicit c-style cast
*ap = 1.0f;
}
-// CHECK: define void @_Z5func4v()
-// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %ap
+// CHECK-LABEL: define void @_Z5func4v()
+// NVPTX: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %ap
+// AMDGCN: store float addrspace(4)* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float addrspace(4)*), float addrspace(4)* addrspace(4)* %ap
__shared__ float b;
__device__ float *func5() {
return &b; // implicit cast from a return value
}
-// CHECK: define float* @_Z5func5v()
-// CHECK: ret float* addrspacecast (float addrspace(3)* @b to float*)
+// NVPTX-LABEL: define float* @_Z5func5v()
+// AMDGCN-LABEL: define float addrspace(4)* @_Z5func5v()
+// NVPTX: ret float* addrspacecast (float addrspace(3)* @b to float*)
+// AMDGCN: ret float addrspace(4)* addrspacecast (float addrspace(3)* @b to float addrspace(4)*)
Index: lib/CodeGen/ItaniumCXXABI.cpp
===================================================================
--- lib/CodeGen/ItaniumCXXABI.cpp
+++ lib/CodeGen/ItaniumCXXABI.cpp
@@ -1108,7 +1108,7 @@
if (!Record->hasTrivialDestructor()) {
CXXDestructorDecl *DtorD = Record->getDestructor();
Dtor = CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete);
- Dtor = llvm::ConstantExpr::getBitCast(Dtor, CGM.Int8PtrTy);
+ Dtor = llvm::ConstantExpr::getPointerCast(Dtor, CGM.Int8PtrTy);
}
}
if (!Dtor) Dtor = llvm::Constant::getNullValue(CGM.Int8PtrTy);
@@ -1223,7 +1223,8 @@
auto *ClassDecl =
cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl());
llvm::Value *Value =
- CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo(), ClassDecl);
+ CGF.GetVTablePtr(ThisPtr, CGF.getTypes().getDefaultPointerTo(
+ StdTypeInfoPtrTy), ClassDecl);
// Load the type info.
Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1ULL);
@@ -1992,7 +1993,8 @@
CGM.getDataLayout().getABITypeAlignment(guardTy));
}
}
- llvm::PointerType *guardPtrTy = guardTy->getPointerTo();
+ llvm::PointerType *guardPtrTy = guardTy->getPointerTo(
+ getContext().getTargetDefaultAddressSpace());
// Create the guard variable if we don't already have it (as we
// might if we're double-emitting this function body).
@@ -2010,7 +2012,10 @@
guard = new llvm::GlobalVariable(CGM.getModule(), guardTy,
false, var->getLinkage(),
llvm::ConstantInt::get(guardTy, 0),
- guardName.str());
+ guardName.str(),
+ /* InsertBefore */ nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ getContext().getTargetGlobalAddressSpace());
guard->setVisibility(var->getVisibility());
// If the variable is thread-local, so is its guard variable.
guard->setThreadLocalMode(var->getThreadLocalMode());
@@ -2171,8 +2176,8 @@
llvm::Value *args[] = {
llvm::ConstantExpr::getBitCast(dtor, dtorTy),
- llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy),
- handle
+ llvm::ConstantExpr::getPointerCast(addr, CGF.Int8PtrTy),
+ llvm::ConstantExpr::getPointerCast(handle, CGF.Int8PtrTy)
};
CGF.EmitNounwindRuntimeCall(atexit, args);
}
@@ -2584,7 +2589,7 @@
}
}
- return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
+ return llvm::ConstantExpr::getPointerCast(GV, CGM.Int8PtrTy);
}
/// TypeInfoIsInStandardLibrary - Given a builtin type, returns whether the type
@@ -2913,7 +2918,7 @@
llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2);
VTable =
llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8PtrTy, VTable, Two);
- VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy);
+ VTable = llvm::ConstantExpr::getPointerCast(VTable, CGM.Int8PtrTy);
Fields.push_back(VTable);
}
@@ -2986,7 +2991,7 @@
assert(!OldGV->hasAvailableExternallyLinkage() &&
"available_externally typeinfos not yet implemented");
- return llvm::ConstantExpr::getBitCast(OldGV, CGM.Int8PtrTy);
+ return llvm::ConstantExpr::getPointerCast(OldGV, CGM.Int8PtrTy);
}
// Check if there is already an external RTTI descriptor for this type.
@@ -3022,7 +3027,7 @@
TypeNameField =
llvm::ConstantExpr::getIntToPtr(TypeNameField, CGM.Int8PtrTy);
} else {
- TypeNameField = llvm::ConstantExpr::getBitCast(TypeName, CGM.Int8PtrTy);
+ TypeNameField = llvm::ConstantExpr::getPointerCast(TypeName, CGM.Int8PtrTy);
}
Fields.push_back(TypeNameField);
@@ -3177,7 +3182,7 @@
}
}
- return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
+ return llvm::ConstantExpr::getPointerCast(GV, CGM.Int8PtrTy);
}
/// BuildObjCObjectTypeInfo - Build the appropriate kind of type_info
Index: lib/CodeGen/CodeGenTypes.h
===================================================================
--- lib/CodeGen/CodeGenTypes.h
+++ lib/CodeGen/CodeGenTypes.h
@@ -182,6 +182,13 @@
/// ConvertType - Convert type T into a llvm::Type.
llvm::Type *ConvertType(QualType T);
+ /// Get a pointer type pointing to the given QualType \p T.
+ llvm::PointerType *getPointerTypeTo(QualType T = QualType());
+
+ /// Get a pointer type pointing to the given llvm::Type \p T in the default
+ /// target address space.
+ llvm::PointerType *getDefaultPointerTo(llvm::Type *T);
+
/// \brief Converts the GlobalDecl into an llvm::Type. This should be used
/// when we know the target of the function we want to convert. This is
/// because some functions (explicitly, those with pass_object_size
Index: lib/CodeGen/CodeGenTypes.cpp
===================================================================
--- lib/CodeGen/CodeGenTypes.cpp
+++ lib/CodeGen/CodeGenTypes.cpp
@@ -375,6 +375,14 @@
return ResultType;
}
+llvm::PointerType *CodeGenTypes::getPointerTypeTo(QualType T) {
+ return ConvertType(T)->getPointerTo(Context.getTargetAddressSpace(T));
+}
+
+llvm::PointerType *CodeGenTypes::getDefaultPointerTo(llvm::Type *T) {
+ return T->getPointerTo(Context.getTargetDefaultAddressSpace());
+}
+
/// ConvertType - Convert the specified type to its LLVM form.
llvm::Type *CodeGenTypes::ConvertType(QualType T) {
T = Context.getCanonicalType(T);
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -47,6 +47,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
@@ -90,6 +91,7 @@
VMContext(M.getContext()), Types(*this), VTables(*this),
SanitizerMD(new SanitizerMetadata(*this)) {
+ unsigned DefaultTargetAddressSpace = Target.getDefaultTargetAddressSpace(LangOpts);
// Initialize the type cache.
llvm::LLVMContext &LLVMContext = M.getContext();
VoidTy = llvm::Type::getVoidTy(LLVMContext);
@@ -99,18 +101,18 @@
Int64Ty = llvm::Type::getInt64Ty(LLVMContext);
FloatTy = llvm::Type::getFloatTy(LLVMContext);
DoubleTy = llvm::Type::getDoubleTy(LLVMContext);
- PointerWidthInBits = C.getTargetInfo().getPointerWidth(0);
+ PointerWidthInBits = C.getTargetInfo().getPointerWidth(DefaultTargetAddressSpace);
PointerAlignInBytes =
- C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(0)).getQuantity();
+ C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(DefaultTargetAddressSpace)).getQuantity();
SizeSizeInBytes =
C.toCharUnitsFromBits(C.getTargetInfo().getMaxPointerWidth()).getQuantity();
IntAlignInBytes =
C.toCharUnitsFromBits(C.getTargetInfo().getIntAlign()).getQuantity();
IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth());
IntPtrTy = llvm::IntegerType::get(LLVMContext,
C.getTargetInfo().getMaxPointerWidth());
- Int8PtrTy = Int8Ty->getPointerTo(0);
- Int8PtrPtrTy = Int8PtrTy->getPointerTo(0);
+ Int8PtrTy = Int8Ty->getPointerTo(DefaultTargetAddressSpace);
+ Int8PtrPtrTy = Int8PtrTy->getPointerTo(DefaultTargetAddressSpace);
RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC();
BuiltinCC = getTargetCodeGenInfo().getABIInfo().getBuiltinCC();
@@ -750,7 +752,7 @@
ctor.addInt(Int32Ty, I.Priority);
ctor.add(llvm::ConstantExpr::getBitCast(I.Initializer, CtorPFTy));
if (I.AssociatedData)
- ctor.add(llvm::ConstantExpr::getBitCast(I.AssociatedData, VoidPtrTy));
+ ctor.add(llvm::ConstantExpr::getPointerCast(I.AssociatedData, VoidPtrTy));
else
ctor.addNullPointer(VoidPtrTy);
ctor.finishAndAddTo(ctors);
@@ -1418,10 +1420,13 @@
*LineNoCst = EmitAnnotationLineNo(L);
// Create the ConstantStruct for the global annotation.
+ unsigned AS = GV->getType()->getAddressSpace();
+ llvm::PointerType *I8PTy = (AS == Int8PtrTy->getAddressSpace()) ?
+ Int8PtrTy : Int8Ty->getPointerTo(AS);
llvm::Constant *Fields[4] = {
- llvm::ConstantExpr::getBitCast(GV, Int8PtrTy),
- llvm::ConstantExpr::getBitCast(AnnoGV, Int8PtrTy),
- llvm::ConstantExpr::getBitCast(UnitGV, Int8PtrTy),
+ llvm::ConstantExpr::getPointerCast(GV, I8PTy),
+ llvm::ConstantExpr::getPointerCast(AnnoGV, I8PTy),
+ llvm::ConstantExpr::getPointerCast(UnitGV, I8PTy),
LineNoCst
};
return llvm::ConstantStruct::getAnon(Fields);
@@ -1548,7 +1553,7 @@
llvm::GlobalValue *Entry = GetGlobalValue(AA->getAliasee());
if (Entry) {
unsigned AS = getContext().getTargetAddressSpace(VD->getType());
- auto Ptr = llvm::ConstantExpr::getBitCast(Entry, DeclTy->getPointerTo(AS));
+ auto Ptr = llvm::ConstantExpr::getPointerCast(Entry, DeclTy->getPointerTo(AS));
return ConstantAddress(Ptr, Alignment);
}
@@ -1900,7 +1905,7 @@
/// GetOrCreateLLVMFunction - If the specified mangled name is not in the
/// module, create and return an llvm Function with the specified type. If there
/// is something in the module with the specified name, return it potentially
-/// bitcasted to the right type.
+/// casted to the right type.
///
/// If D is non-null, it specifies a decl that correspond to this. This is used
/// to set the attributes on the function when it is first created.
@@ -1952,7 +1957,7 @@
// (If function is requested for a definition, we always need to create a new
// function, not just return a bitcast.)
if (!IsForDefinition)
- return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo());
+ return llvm::ConstantExpr::getPointerCast(Entry, Ty->getPointerTo());
}
// This function doesn't have a complete type (for example, the return
@@ -2060,7 +2065,7 @@
}
llvm::Type *PTy = llvm::PointerType::getUnqual(Ty);
- return llvm::ConstantExpr::getBitCast(F, PTy);
+ return llvm::ConstantExpr::getPointerCast(F, PTy);
}
/// GetAddrOfFunction - Return the address of the given function. If Ty is
@@ -2189,7 +2194,7 @@
/// GetOrCreateLLVMGlobal - If the specified mangled name is not in the module,
/// create and return an llvm GlobalVariable with the specified type. If there
/// is something in the module with the specified name, return it potentially
-/// bitcasted to the right type.
+/// casted to the right type.
///
/// If D is non-null, it specifies a decl that correspond to this. This is used
/// to set the attributes on the global when it is first created.
@@ -2237,14 +2242,10 @@
}
}
- // Make sure the result is of the correct type.
- if (Entry->getType()->getAddressSpace() != Ty->getAddressSpace())
- return llvm::ConstantExpr::getAddrSpaceCast(Entry, Ty);
-
// (If global is requested for a definition, we always need to create a new
// global, not just return a bitcast.)
if (!IsForDefinition)
- return llvm::ConstantExpr::getBitCast(Entry, Ty);
+ return llvm::ConstantExpr::getPointerCast(Entry, Ty);
}
unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace());
@@ -2260,7 +2261,7 @@
if (!Entry->use_empty()) {
llvm::Constant *NewPtrForOldDecl =
- llvm::ConstantExpr::getBitCast(GV, Entry->getType());
+ llvm::ConstantExpr::getPointerCast(GV, Entry->getType());
Entry->replaceAllUsesWith(NewPtrForOldDecl);
}
@@ -2372,7 +2373,7 @@
if (!OldGV->use_empty()) {
llvm::Constant *NewPtrForOldDecl =
- llvm::ConstantExpr::getBitCast(GV, OldGV->getType());
+ llvm::ConstantExpr::getPointerCast(GV, OldGV->getType());
OldGV->replaceAllUsesWith(NewPtrForOldDecl);
}
@@ -2452,6 +2453,12 @@
AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_shared);
else
AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_device);
+ } else if (getTriple().getArch() == llvm::Triple::amdgcn &&
+ (LangOpts.CPlusPlus || LangOpts.OpenMP)) {
+ if (D && D->getType().isConstant(getContext()))
+ AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_constant);
+ else
+ AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_global);
}
return AddrSpace;
@@ -2621,7 +2628,7 @@
// Replace all uses of the old global with the new global
llvm::Constant *NewPtrForOldDecl =
- llvm::ConstantExpr::getBitCast(GV, Entry->getType());
+ llvm::ConstantExpr::getPointerCast(GV, Entry->getType());
Entry->replaceAllUsesWith(NewPtrForOldDecl);
// Erase the old global, since it is no longer used.
@@ -3116,7 +3123,7 @@
// Remove it and replace uses of it with the alias.
GA->takeName(Entry);
- Entry->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GA,
+ Entry->replaceAllUsesWith(llvm::ConstantExpr::getPointerCast(GA,
Entry->getType()));
Entry->eraseFromParent();
} else {
@@ -3334,7 +3341,7 @@
if (isUTF16)
// Cast the UTF16 string to the correct type.
- Str = llvm::ConstantExpr::getBitCast(Str, Int8PtrTy);
+ Str = llvm::ConstantExpr::getPointerCast(Str, Int8PtrTy);
Fields.add(Str);
// String length.
@@ -3442,7 +3449,7 @@
CodeGenModule &CGM, StringRef GlobalName,
CharUnits Alignment) {
// OpenCL v1.2 s6.5.3: a string literal is in the constant address space.
- unsigned AddrSpace = 0;
+ unsigned AddrSpace = CGM.getContext().getTargetConstantAddressSpace();
if (CGM.getLangOpts().OpenCL)
AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant);
@@ -3760,6 +3767,9 @@
/// EmitTopLevelDecl - Emit code for a single top level declaration.
void CodeGenModule::EmitTopLevelDecl(Decl *D) {
+ if (getenv("DBG_CG_DECL")) {
+ llvm::errs() << "decl: "; D->dump();
+ }
// Ignore dependent declarations.
if (D->getDeclContext() && D->getDeclContext()->isDependentContext())
return;
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -377,7 +377,7 @@
};
/// i32s containing the indexes of the cleanup destinations.
- llvm::AllocaInst *NormalCleanupDest;
+ llvm::Instruction *NormalCleanupDest;
unsigned NextCleanupDestIndex;
@@ -392,8 +392,8 @@
llvm::Value *ExceptionSlot;
/// The selector slot. Under the MandatoryCleanup model, all landing pads
- /// write the current selector value into this alloca.
- llvm::AllocaInst *EHSelectorSlot;
+ /// write the current selector value into this instruction.
+ llvm::Instruction *EHSelectorSlot;
/// A stack of exception code slots. Entering an __except block pushes a slot
/// on the stack and leaving pops one. The __exception_code() intrinsic loads
@@ -428,11 +428,11 @@
/// An i1 variable indicating whether or not the @finally is
/// running for an exception.
- llvm::AllocaInst *ForEHVar;
+ llvm::Instruction *ForEHVar;
/// An i8* variable into which the exception pointer to rethrow
/// has been saved.
- llvm::AllocaInst *SavedExnVar;
+ llvm::Instruction *SavedExnVar;
public:
void enter(CodeGenFunction &CGF, const Stmt *Finally,
@@ -1858,14 +1858,23 @@
AlignmentSource *Source = nullptr);
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy);
+ /// Create an alloca instruction. If the default address space is not 0,
+ /// insert addrspacecast instruction which casts the alloca instruction
+ /// to the default address space.
+ llvm::Instruction *CreateAlloca(llvm::Type *Ty, const Twine &Name = "tmp",
+ llvm::Instruction *InsertPos = nullptr);
/// CreateTempAlloca - This creates a alloca and inserts it into the entry
/// block. The caller is responsible for setting an appropriate alignment on
- /// the alloca.
- llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty,
- const Twine &Name = "tmp");
+ /// the alloca. If the default address space is not 0, insert addrspacecast.
+ llvm::Instruction *CreateTempAlloca(llvm::Type *Ty,
+ const Twine &Name = "tmp");
Address CreateTempAlloca(llvm::Type *Ty, CharUnits align,
const Twine &Name = "tmp");
+ /// Get alloca instruction operand of an addrspacecast instruction.
+ /// If \p Inst is alloca instruction, returns \p Inst;
+ llvm::AllocaInst *getAddrSpaceCastedAlloca(llvm::Instruction *Inst) const;
+
/// CreateDefaultAlignedTempAlloca - This creates an alloca with the
/// default ABI alignment of the given LLVM type.
///
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -442,7 +442,7 @@
"callsite");
llvm::Value *args[] = {
- llvm::ConstantExpr::getBitCast(CurFn, PointerTy),
+ llvm::ConstantExpr::getPointerCast(CurFn, PointerTy),
CallSite
};
Index: lib/CodeGen/CGVTables.cpp
===================================================================
--- lib/CodeGen/CGVTables.cpp
+++ lib/CodeGen/CGVTables.cpp
@@ -550,7 +550,7 @@
return addOffsetConstant(component.getOffsetToTop());
case VTableComponent::CK_RTTI:
- return builder.add(llvm::ConstantExpr::getBitCast(rtti, CGM.Int8PtrTy));
+ return builder.add(llvm::ConstantExpr::getPointerCast(rtti, CGM.Int8PtrTy));
case VTableComponent::CK_FunctionPointer:
case VTableComponent::CK_CompleteDtorPointer:
@@ -594,7 +594,7 @@
llvm::Constant *fn = CGM.CreateRuntimeFunction(fnTy, name);
if (auto f = dyn_cast<llvm::Function>(fn))
f->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
- return llvm::ConstantExpr::getBitCast(fn, CGM.Int8PtrTy);
+ return llvm::ConstantExpr::getPointerCast(fn, CGM.Int8PtrTy);
};
llvm::Constant *fnPtr;
@@ -628,7 +628,7 @@
fnPtr = CGM.GetAddrOfFunction(GD, fnTy, /*ForVTable=*/true);
}
- fnPtr = llvm::ConstantExpr::getBitCast(fnPtr, CGM.Int8PtrTy);
+ fnPtr = llvm::ConstantExpr::getPointerCast(fnPtr, CGM.Int8PtrTy);
builder.add(fnPtr);
return;
}
Index: lib/CodeGen/CGVTT.cpp
===================================================================
--- lib/CodeGen/CGVTT.cpp
+++ lib/CodeGen/CGVTT.cpp
@@ -84,7 +84,7 @@
VTable->getValueType(), VTable, Idxs, /*InBounds=*/true,
/*InRangeIndex=*/1);
- Init = llvm::ConstantExpr::getBitCast(Init, Int8PtrTy);
+ Init = llvm::ConstantExpr::getPointerCast(Init, Int8PtrTy);
VTTComponents.push_back(Init);
}
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -924,7 +924,7 @@
DefaultOpenMPPSource =
CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
DefaultOpenMPPSource =
- llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
+ llvm::ConstantExpr::getPointerCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
}
ConstantInitBuilder builder(CGM);
@@ -2918,7 +2918,7 @@
llvm::Module &M = CGM.getModule();
// Make sure the address has the right type.
- llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
+ llvm::Constant *AddrPtr = llvm::ConstantExpr::getPointerCast(ID, CGM.VoidPtrTy);
// Create constant string with the name.
llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
@@ -2928,7 +2928,7 @@
llvm::GlobalValue::InternalLinkage, StrPtrInit,
".omp_offloading.entry_name");
Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
- llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
+ llvm::Constant *StrPtr = llvm::ConstantExpr::getPointerCast(Str, CGM.Int8PtrTy);
// We can't have any padding between symbols, so we need to have 1-byte
// alignment.
@@ -4871,7 +4871,7 @@
// the device, because these functions will be entry points to the device.
if (CGM.getLangOpts().OpenMPIsDevice) {
- OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
+ OutlinedFnID = llvm::ConstantExpr::getPointerCast(OutlinedFn, CGM.Int8PtrTy);
OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
} else
OutlinedFnID = new llvm::GlobalVariable(
Index: lib/CodeGen/CGGPUBuiltin.cpp
===================================================================
--- lib/CodeGen/CGGPUBuiltin.cpp
+++ lib/CodeGen/CGGPUBuiltin.cpp
@@ -21,9 +21,9 @@
using namespace clang;
using namespace CodeGen;
-static llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
- llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
- llvm::Type::getInt8PtrTy(M.getContext())};
+static llvm::Function *GetVprintfDeclaration(CodeGenModule &CGM) {
+ auto &M = CGM.getModule();
+ llvm::Type *ArgTypes[] = {CGM.Int8PtrTy, CGM.Int8PtrTy};
llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
@@ -69,12 +69,13 @@
RValue
CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
ReturnValueSlot ReturnValue) {
- assert(getTarget().getTriple().isNVPTX());
+ assert(getTarget().getTriple().isNVPTX() ||
+ (getTarget().getTriple().getArch() == llvm::Triple::amdgcn &&
+ getLangOpts().CUDA));
assert(E->getBuiltinCallee() == Builtin::BIprintf);
assert(E->getNumArgs() >= 1); // printf always has at least one arg.
const llvm::DataLayout &DL = CGM.getDataLayout();
- llvm::LLVMContext &Ctx = CGM.getLLVMContext();
CallArgList Args;
EmitCallArgs(Args,
@@ -93,7 +94,7 @@
llvm::Value *BufferPtr;
if (Args.size() <= 1) {
// If there are no args, pass a null pointer to vprintf.
- BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
+ BufferPtr = llvm::ConstantPointerNull::get(CGM.Int8PtrTy);
} else {
llvm::SmallVector<llvm::Type *, 8> ArgTypes;
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
@@ -112,11 +113,11 @@
llvm::Value *Arg = Args[I].RV.getScalarVal();
Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType()));
}
- BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
+ BufferPtr = Builder.CreatePointerCast(Alloca, CGM.Int8PtrTy);
}
// Invoke vprintf and return.
- llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule());
+ llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM);
return RValue::get(
Builder.CreateCall(VprintfFunc, {Args[0].RV.getScalarVal(), BufferPtr}));
}
Index: lib/CodeGen/CGExprScalar.cpp
===================================================================
--- lib/CodeGen/CGExprScalar.cpp
+++ lib/CodeGen/CGExprScalar.cpp
@@ -256,8 +256,15 @@
//===--------------------------------------------------------------------===//
Value *Visit(Expr *E) {
+ if (getenv("DBG_CG_SCALAR_EXPR")) {
+ llvm::errs() << "Expr: "; E->dump();
+ }
ApplyDebugLocation DL(CGF, E);
- return StmtVisitor<ScalarExprEmitter, Value*>::Visit(E);
+ auto Res = StmtVisitor<ScalarExprEmitter, Value*>::Visit(E);
+ if (getenv("DBG_CG_SCALAR_EXPR")) {
+ llvm::errs() << " => " << *Res << '\n';
+ }
+ return Res;
}
Value *VisitStmt(Stmt *S) {
Index: lib/CodeGen/CGExprConstant.cpp
===================================================================
--- lib/CodeGen/CGExprConstant.cpp
+++ lib/CodeGen/CGExprConstant.cpp
@@ -1316,7 +1316,7 @@
if (!Offset->isNullValue()) {
unsigned AS = C->getType()->getPointerAddressSpace();
llvm::Type *CharPtrTy = Int8Ty->getPointerTo(AS);
- llvm::Constant *Casted = llvm::ConstantExpr::getBitCast(C, CharPtrTy);
+ llvm::Constant *Casted = llvm::ConstantExpr::getPointerCast(C, CharPtrTy);
Casted = llvm::ConstantExpr::getGetElementPtr(Int8Ty, Casted, Offset);
C = llvm::ConstantExpr::getPointerCast(Casted, C->getType());
}
Index: lib/CodeGen/CGExprCXX.cpp
===================================================================
--- lib/CodeGen/CGExprCXX.cpp
+++ lib/CodeGen/CGExprCXX.cpp
@@ -2024,8 +2024,7 @@
}
llvm::Value *CodeGenFunction::EmitCXXTypeidExpr(const CXXTypeidExpr *E) {
- llvm::Type *StdTypeInfoPtrTy =
- ConvertType(E->getType())->getPointerTo();
+ llvm::Type *StdTypeInfoPtrTy = getTypes().getPointerTypeTo(E->getType());
if (E->isTypeOperand()) {
llvm::Constant *TypeInfo =
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -62,16 +62,37 @@
/// block.
Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
const Twine &Name) {
- auto Alloca = CreateTempAlloca(Ty, Name);
+ auto CastedAlloca = CreateTempAlloca(Ty, Name);
+ auto *Alloca = getAddrSpaceCastedAlloca(CastedAlloca);
Alloca->setAlignment(Align.getQuantity());
- return Address(Alloca, Align);
+ return Address(CastedAlloca, Align);
}
/// CreateTempAlloca - This creates a alloca and inserts it into the entry
/// block.
-llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
- const Twine &Name) {
- return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt);
+llvm::Instruction *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
+ const Twine &Name) {
+ return CreateAlloca(Ty, Name, AllocaInsertPt);
+}
+
+llvm::Instruction *CodeGenFunction::CreateAlloca(llvm::Type *Ty,
+ const Twine &Name,
+ llvm::Instruction *InsertPos) {
+ llvm::Instruction *V = new llvm::AllocaInst(Ty, nullptr, Name, InsertPos);
+ auto DefaultAddr = getTarget().getDefaultTargetAddressSpace(getLangOpts());
+ if (DefaultAddr != 0) {
+ auto *DestTy = llvm::PointerType::get(V->getType()->getPointerElementType(),
+ DefaultAddr);
+ V = new llvm::AddrSpaceCastInst(V, DestTy, "", InsertPos);
+ }
+ return V;
+}
+
+llvm::AllocaInst *
+CodeGenFunction::getAddrSpaceCastedAlloca(llvm::Instruction *V) const {
+ if (auto *Cast = dyn_cast<llvm::AddrSpaceCastInst>(V))
+ return cast<llvm::AllocaInst>(Cast->getOperand(0));
+ return cast<llvm::AllocaInst>(V);
}
/// CreateDefaultAlignTempAlloca - This creates an alloca with the
@@ -416,8 +437,8 @@
// Create and initialize the reference temporary.
Address Object = createReferenceTemporary(*this, M, E);
if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object.getPointer())) {
- Object = Address(llvm::ConstantExpr::getBitCast(
- Var, ConvertTypeForMem(E->getType())->getPointerTo()),
+ Object = Address(llvm::ConstantExpr::getPointerCast(
+ Var, getTypes().getPointerTypeTo(E->getType())),
Object.getAlignment());
// If the temporary is a global and has a constant initializer or is a
// constant temporary that we promoted to a global, we may have already
@@ -2887,7 +2908,9 @@
}
QualType EltType = E->getType()->castAsArrayTypeUnsafe()->getElementType();
- return Builder.CreateElementBitCast(Addr, ConvertTypeForMem(EltType));
+ return Builder.CreatePointerBitCastOrAddrSpaceCast(Addr,
+ ConvertTypeForMem(EltType)->getPointerTo(getContext().
+ getTargetAddressSpace(E->getType())));
}
/// isSimpleArrayDecayOperand - If the specified expr is a simple decay from an
Index: lib/CodeGen/CGException.cpp
===================================================================
--- lib/CodeGen/CGException.cpp
+++ lib/CodeGen/CGException.cpp
@@ -237,7 +237,7 @@
static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM,
const EHPersonality &Personality) {
llvm::Constant *Fn = getPersonalityFn(CGM, Personality);
- return llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
+ return llvm::ConstantExpr::getPointerCast(Fn, CGM.Int8PtrTy);
}
/// Check whether a landingpad instruction only uses C++ features.
@@ -1520,7 +1520,7 @@
llvm::Function *FrameRecoverFn = llvm::Intrinsic::getDeclaration(
&CGM.getModule(), llvm::Intrinsic::localrecover);
llvm::Constant *ParentI8Fn =
- llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
+ llvm::ConstantExpr::getPointerCast(ParentCGF.CurFn, Int8PtrTy);
RecoverCall = Builder.CreateCall(
FrameRecoverFn, {ParentI8Fn, ParentFP,
llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx)});
@@ -1585,7 +1585,7 @@
llvm::Function *RecoverFPIntrin =
CGM.getIntrinsic(llvm::Intrinsic::x86_seh_recoverfp);
llvm::Constant *ParentI8Fn =
- llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
+ llvm::ConstantExpr::getPointerCast(ParentCGF.CurFn, Int8PtrTy);
ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryFP});
}
@@ -1812,7 +1812,7 @@
llvm::Function *FilterFunc =
HelperCGF.GenerateSEHFilterFunction(*this, *Except);
llvm::Constant *OpaqueFunc =
- llvm::ConstantExpr::getBitCast(FilterFunc, Int8PtrTy);
+ llvm::ConstantExpr::getPointerCast(FilterFunc, Int8PtrTy);
CatchScope->setHandler(0, OpaqueFunc, createBasicBlock("__except.ret"));
}
Index: lib/CodeGen/CGDeclCXX.cpp
===================================================================
--- lib/CodeGen/CGDeclCXX.cpp
+++ lib/CodeGen/CGDeclCXX.cpp
@@ -103,8 +103,8 @@
CXXDestructorDecl *dtor = Record->getDestructor();
function = CGM.getAddrOfCXXStructor(dtor, StructorType::Complete);
- argument = llvm::ConstantExpr::getBitCast(
- addr.getPointer(), CGF.getTypes().ConvertType(type)->getPointerTo());
+ argument = llvm::ConstantExpr::getPointerCast(
+ addr.getPointer(), CGF.getTypes().getPointerTypeTo(type));
// Otherwise, the standard logic requires a helper function.
} else {
@@ -135,7 +135,7 @@
CharUnits WidthChars = CGF.getContext().getTypeSizeInChars(D.getType());
uint64_t Width = WidthChars.getQuantity();
llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(CGF.Int64Ty, Width),
- llvm::ConstantExpr::getBitCast(Addr, CGF.Int8PtrTy)};
+ llvm::ConstantExpr::getPointerCast(Addr, CGF.Int8PtrTy)};
CGF.Builder.CreateCall(InvariantStart, Args);
}
Index: lib/CodeGen/CGDecl.cpp
===================================================================
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -1075,7 +1075,15 @@
llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla");
vla->setAlignment(alignment.getQuantity());
- address = Address(vla, alignment);
+ llvm::Value *V = vla;
+ auto DefaultAddr = getTarget().getDefaultTargetAddressSpace(getLangOpts());
+ if (DefaultAddr != 0) {
+ auto *DestTy =
+ llvm::PointerType::get(vla->getType()->getElementType(), DefaultAddr);
+ V = Builder.CreateAddrSpaceCast(vla, DestTy);
+ }
+
+ address = Address(V, alignment);
}
setAddrOfLocalVar(&D, address);
@@ -1244,7 +1252,7 @@
// Otherwise, create a temporary global with the initializer then
// memcpy from the global to the alloca.
std::string Name = getStaticDeclName(CGM, D);
- unsigned AS = 0;
+ unsigned AS = CGM.getContext().getTargetConstantAddressSpace();
if (getLangOpts().OpenCL) {
AS = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant);
BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS);
Index: lib/CodeGen/CGClass.cpp
===================================================================
--- lib/CodeGen/CGClass.cpp
+++ lib/CodeGen/CGClass.cpp
@@ -2372,12 +2372,16 @@
// Finally, store the address point. Use the same LLVM types as the field to
// support optimization.
+ auto DefAddr = CGM.getTarget().getDefaultTargetAddressSpace(
+ CGM.getLangOpts());
llvm::Type *VTablePtrTy =
llvm::FunctionType::get(CGM.Int32Ty, /*isVarArg=*/true)
- ->getPointerTo()
- ->getPointerTo();
- VTableField = Builder.CreateBitCast(VTableField, VTablePtrTy->getPointerTo());
- VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy);
+ ->getPointerTo(DefAddr)
+ ->getPointerTo(DefAddr);
+ VTableField = Builder.CreatePointerBitCastOrAddrSpaceCast(VTableField,
+ VTablePtrTy->getPointerTo(DefAddr));
+ VTableAddressPoint = Builder.CreatePointerBitCastOrAddrSpaceCast(
+ VTableAddressPoint, VTablePtrTy);
llvm::StoreInst *Store = Builder.CreateStore(VTableAddressPoint, VTableField);
CGM.DecorateInstructionWithTBAA(Store, CGM.getTBAAInfoForVTablePtr());
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -3643,18 +3643,19 @@
if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) {
ArgMemoryLayout = CGM.getDataLayout().getStructLayout(ArgStruct);
llvm::Instruction *IP = CallArgs.getStackBase();
- llvm::AllocaInst *AI;
+ llvm::Instruction *CastedAI;
if (IP) {
IP = IP->getNextNode();
- AI = new llvm::AllocaInst(ArgStruct, "argmem", IP);
+ CastedAI = CreateAlloca(ArgStruct, "argmem", IP);
} else {
- AI = CreateTempAlloca(ArgStruct, "argmem");
+ CastedAI = CreateTempAlloca(ArgStruct, "argmem");
}
auto Align = CallInfo.getArgStructAlignment();
+ auto *AI = getAddrSpaceCastedAlloca(CastedAI);
AI->setAlignment(Align.getQuantity());
AI->setUsedWithInAlloca(true);
assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca());
- ArgMemory = Address(AI, Align);
+ ArgMemory = Address(CastedAI, Align);
}
// Helper function to drill into the inalloca allocation.
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -2312,7 +2312,7 @@
case Builtin::BI__GetExceptionInfo: {
if (llvm::GlobalVariable *GV =
CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
- return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
+ return RValue::get(llvm::ConstantExpr::getPointerCast(GV, CGM.Int8PtrTy));
break;
}
@@ -2674,7 +2674,9 @@
Arg));
}
case Builtin::BIprintf:
- if (getTarget().getTriple().isNVPTX())
+ if (getTarget().getTriple().isNVPTX() ||
+ (getTarget().getTriple().getArch() == Triple::amdgcn &&
+ getLangOpts().CUDA))
return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
break;
case Builtin::BI__builtin_canonicalize:
Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -1997,16 +1997,6 @@
return llvm::makeArrayRef(GCCRegNames);
}
-static const unsigned AMDGPUAddrSpaceMap[] = {
- 1, // opencl_global
- 3, // opencl_local
- 2, // opencl_constant
- 4, // opencl_generic
- 1, // cuda_device
- 2, // cuda_constant
- 3 // cuda_shared
-};
-
// If you edit the description strings, make sure you update
// getPointerWidthV().
@@ -2020,9 +2010,18 @@
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
class AMDGPUTargetInfo final : public TargetInfo {
+ static const unsigned AddrSpaceMap_[7];
static const Builtin::Info BuiltinInfo[];
static const char * const GCCRegNames[];
+ enum AddrSpaceKind {
+ AS_Private = 0,
+ AS_Global = 1,
+ AS_Constant = 2,
+ AS_Local = 3,
+ AS_Generic = 4
+ };
+
/// \brief The GPU profiles supported by the AMDGPU target.
enum GPUKind {
GK_NONE,
@@ -2066,7 +2065,7 @@
resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ?
DataLayoutStringSI : DataLayoutStringR600);
- AddrSpaceMap = &AMDGPUAddrSpaceMap;
+ AddrSpaceMap = &AddrSpaceMap_;
UseAddrSpaceMapMangling = true;
}
@@ -2254,6 +2253,23 @@
}
}
+ unsigned
+ getDefaultTargetAddressSpace(const LangOptions &Opts) const override {
+ // OpenCL sets address space explicitly in AST. The default case (type
+ // qualifier containing no address space) represents private address space.
+ if (Opts.OpenCL)
+ return AS_Private;
+ return AS_Generic;
+ }
+
+ unsigned getConstantAddressSpace() const override {
+ return AS_Constant;
+ }
+
+ unsigned getGlobalAddressSpace() const override {
+ return AS_Global;
+ }
+
LangAS::ID getOpenCLImageAddrSpace() const override {
return LangAS::opencl_constant;
}
@@ -2268,14 +2284,23 @@
}
}
- // In amdgcn target the null pointer in global, constant, and generic
- // address space has value 0 but in private and local address space has
- // value ~0.
+ // In amdgcn target the null pointer in local and private address spaces has
+ // value ~0 and in other address spaces has value 0.
uint64_t getNullPointerValue(unsigned AS) const override {
- return AS != LangAS::opencl_local && AS != 0 ? 0 : ~0;
+ return AS != AS_Local && AS != 0 ? 0 : ~0;
}
};
+const unsigned AMDGPUTargetInfo::AddrSpaceMap_[] = {
+ AS_Global, // opencl_global
+ AS_Local, // opencl_local
+ AS_Constant, // opencl_constant
+ AS_Generic, // opencl_generic
+ AS_Global, // cuda_device
+ AS_Constant, // cuda_constant
+ AS_Local // cuda_shared
+};
+
const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
#define BUILTIN(ID, TYPE, ATTRS) \
{ #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
Index: lib/Basic/TargetInfo.cpp
===================================================================
--- lib/Basic/TargetInfo.cpp
+++ lib/Basic/TargetInfo.cpp
@@ -330,6 +330,13 @@
if (Opts.NewAlignOverride)
NewAlign = Opts.NewAlignOverride * getCharWidth();
+
+ if (getTriple().getArch() == llvm::Triple::amdgcn) {
+ auto DefAddr = getDefaultTargetAddressSpace(Opts);
+ // AMDGPUTargetInfo only implements getPointerWidthV and assumes
+ // pointers are self-aligned.
+ PointerWidth = PointerAlign = getPointerWidthV(DefAddr);
+ }
}
bool TargetInfo::initFeatureMap(
Index: lib/AST/ASTContext.cpp
===================================================================
--- lib/AST/ASTContext.cpp
+++ lib/AST/ASTContext.cpp
@@ -9531,13 +9531,40 @@
uint64_t ASTContext::getTargetNullPointerValue(QualType QT) const {
unsigned AS;
if (QT->getUnqualifiedDesugaredType()->isNullPtrType())
- AS = 0;
+ AS = getTargetInfo().getDefaultTargetAddressSpace(LangOpts);
else
- AS = QT->getPointeeType().getAddressSpace();
+ AS = getTargetAddressSpace(QT->getPointeeType());
return getTargetInfo().getNullPointerValue(AS);
}
+unsigned ASTContext::getTargetDefaultAddressSpace() const {
+ return getTargetInfo().getDefaultTargetAddressSpace(LangOpts);
+}
+
+unsigned ASTContext::getTargetConstantAddressSpace() const {
+ return getTargetInfo().getConstantAddressSpace();
+}
+
+unsigned ASTContext::getTargetGlobalAddressSpace() const {
+ return getTargetInfo().getGlobalAddressSpace();
+}
+
+unsigned ASTContext::getTargetAddressSpace(QualType T) const {
+ if (T.isNull())
+ return getTargetDefaultAddressSpace();
+ if (T->isFunctionType() &&
+ !T.getQualifiers().hasAddressSpace())
+ return 0;
+ return getTargetAddressSpace(T.getQualifiers());
+}
+
+unsigned ASTContext::getTargetAddressSpace(Qualifiers Q) const {
+ return Q.hasAddressSpace()
+ ? getTargetAddressSpace(Q.getAddressSpace())
+ : getTargetDefaultAddressSpace();
+}
+
// Explicitly instantiate this in case a Redeclarable<T> is used from a TU that
// doesn't include ASTContext.h
template
Index: include/clang/Basic/TargetInfo.h
===================================================================
--- include/clang/Basic/TargetInfo.h
+++ include/clang/Basic/TargetInfo.h
@@ -302,11 +302,23 @@
}
/// \brief Get integer value for null pointer.
- /// \param AddrSpace address space of pointee in source language.
+ /// \param AddrSpace target address space of pointee.
virtual uint64_t getNullPointerValue(unsigned AddrSpace) const {
return 0;
}
+ /// The target address space corresponding to OpenCL constant address space
+ /// CUDA constant specifier.
+ virtual unsigned getConstantAddressSpace() const {
+ return 0;
+ }
+
+ /// The target address space corresponding to OpenCL global address space
+ /// or CUDA device specifier.
+ virtual unsigned getGlobalAddressSpace() const {
+ return 0;
+ }
+
/// \brief Return the size of '_Bool' and C++ 'bool' for this target, in bits.
unsigned getBoolWidth() const { return BoolWidth; }
@@ -953,6 +965,10 @@
return *AddrSpaceMap;
}
+ virtual unsigned getDefaultTargetAddressSpace(const LangOptions &Opt) const {
+ return 0;
+ }
+
/// \brief Retrieve the name of the platform as it is used in the
/// availability attribute.
StringRef getPlatformName() const { return PlatformName; }
Index: include/clang/AST/ASTContext.h
===================================================================
--- include/clang/AST/ASTContext.h
+++ include/clang/AST/ASTContext.h
@@ -2300,13 +2300,9 @@
QualType getFloatingTypeOfSizeWithinDomain(QualType typeSize,
QualType typeDomain) const;
- unsigned getTargetAddressSpace(QualType T) const {
- return getTargetAddressSpace(T.getQualifiers());
- }
+ unsigned getTargetAddressSpace(QualType T) const;
- unsigned getTargetAddressSpace(Qualifiers Q) const {
- return getTargetAddressSpace(Q.getAddressSpace());
- }
+ unsigned getTargetAddressSpace(Qualifiers Q) const;
unsigned getTargetAddressSpace(unsigned AS) const {
if (AS < LangAS::Offset || AS >= LangAS::Offset + LangAS::Count)
@@ -2319,6 +2315,16 @@
/// constant folding.
uint64_t getTargetNullPointerValue(QualType QT) const;
+ unsigned getTargetDefaultAddressSpace() const;
+
+ /// The target address space corresponding to OpenCL constant address space
+ /// CUDA constant specifier.
+ unsigned getTargetConstantAddressSpace() const;
+
+ /// The target address space corresponding to OpenCL global address space
+ /// or CUDA device specifier.
+ unsigned getTargetGlobalAddressSpace() const;
+
bool addressSpaceMapManglingFor(unsigned AS) const {
return AddrSpaceMapMangling ||
AS < LangAS::Offset ||
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits