Anastasia created this revision. Anastasia added a reviewer: yaxunl. Anastasia added a subscriber: cfe-commits.
Clang performs some optimizations/shortcuts for const qualified aggregate variables while generating them on the stack. It bypasses the generation of alloca instructions and their uses are replaced by accesses to the initializer global variable directly instead. In OpenCL we can only do this if a variable is qualified with a constant address space. https://reviews.llvm.org/D27109 Files: lib/CodeGen/CGDecl.cpp test/CodeGenOpenCL/constant-addr-space-globals.cl Index: test/CodeGenOpenCL/constant-addr-space-globals.cl =================================================================== --- test/CodeGenOpenCL/constant-addr-space-globals.cl +++ test/CodeGenOpenCL/constant-addr-space-globals.cl @@ -6,3 +6,22 @@ kernel void test(global float *out) { *out = array[0]; } + +// Test that we don't use directly initializers for const aggregates +// but create a copy in the original address space (unless a variable itself is +// in the constant address space). + +void foo(constant const int *p1, const int *p2, const int *p3); +// CHECK: @k.arr1 = internal addrspace(3) constant [3 x i32] [i32 1, i32 2, i32 3] +// CHECK: @k.arr2 = private unnamed_addr addrspace(3) constant [3 x i32] [i32 4, i32 5, i32 6] +// CHECK: @k.arr3 = private unnamed_addr addrspace(3) constant [3 x i32] [i32 7, i32 8, i32 9] +kernel void k(void) { + // CHECK-NOT: %arr1 = alloca [3 x i32] + constant const int arr1[] = {1, 2, 3}; + // CHECK: %arr2 = alloca [3 x i32] + const int arr2[] = {4, 5, 6}; + // CHECK: %arr3 = alloca [3 x i32] + int arr3[] = {7, 8, 9}; + + foo(arr1, arr2, arr3); +} Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -948,15 +948,18 @@ // If the variable's a const type, and it's neither an NRVO // candidate nor a __block variable and has no mutable members, // emit it as a global instead. - if (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef && - CGM.isTypeConstant(Ty, true)) { - EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage); - - // Signal this condition to later callbacks. - emission.Addr = Address::invalid(); - assert(emission.wasEmittedAsGlobal()); - return emission; - } + // Exception is if a variable is located in non-constant address space + // in OpenCL. + if (!getLangOpts().OpenCL || Ty.getAddressSpace() == LangAS::opencl_constant) + if (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef && + CGM.isTypeConstant(Ty, true)) { + EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage); + + // Signal this condition to later callbacks. + emission.Addr = Address::invalid(); + assert(emission.wasEmittedAsGlobal()); + return emission; + } // Otherwise, tell the initialization code that we're in this case. emission.IsConstantAggregate = true;
Index: test/CodeGenOpenCL/constant-addr-space-globals.cl =================================================================== --- test/CodeGenOpenCL/constant-addr-space-globals.cl +++ test/CodeGenOpenCL/constant-addr-space-globals.cl @@ -6,3 +6,22 @@ kernel void test(global float *out) { *out = array[0]; } + +// Test that we don't use directly initializers for const aggregates +// but create a copy in the original address space (unless a variable itself is +// in the constant address space). + +void foo(constant const int *p1, const int *p2, const int *p3); +// CHECK: @k.arr1 = internal addrspace(3) constant [3 x i32] [i32 1, i32 2, i32 3] +// CHECK: @k.arr2 = private unnamed_addr addrspace(3) constant [3 x i32] [i32 4, i32 5, i32 6] +// CHECK: @k.arr3 = private unnamed_addr addrspace(3) constant [3 x i32] [i32 7, i32 8, i32 9] +kernel void k(void) { + // CHECK-NOT: %arr1 = alloca [3 x i32] + constant const int arr1[] = {1, 2, 3}; + // CHECK: %arr2 = alloca [3 x i32] + const int arr2[] = {4, 5, 6}; + // CHECK: %arr3 = alloca [3 x i32] + int arr3[] = {7, 8, 9}; + + foo(arr1, arr2, arr3); +} Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -948,15 +948,18 @@ // If the variable's a const type, and it's neither an NRVO // candidate nor a __block variable and has no mutable members, // emit it as a global instead. - if (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef && - CGM.isTypeConstant(Ty, true)) { - EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage); - - // Signal this condition to later callbacks. - emission.Addr = Address::invalid(); - assert(emission.wasEmittedAsGlobal()); - return emission; - } + // Exception is if a variable is located in non-constant address space + // in OpenCL. + if (!getLangOpts().OpenCL || Ty.getAddressSpace() == LangAS::opencl_constant) + if (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef && + CGM.isTypeConstant(Ty, true)) { + EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage); + + // Signal this condition to later callbacks. + emission.Addr = Address::invalid(); + assert(emission.wasEmittedAsGlobal()); + return emission; + } // Otherwise, tell the initialization code that we're in this case. emission.IsConstantAggregate = true;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits