Anastasia created this revision.
Anastasia added a reviewer: yaxunl.
Anastasia added a subscriber: cfe-commits.

Clang performs some optimizations/shortcuts for const qualified aggregate 
variables while generating them on the stack. It bypasses the generation of 
alloca instructions and their uses are replaced by accesses to the initializer 
global variable directly instead.

In OpenCL we can only do this if a variable is qualified with a constant 
address space.


https://reviews.llvm.org/D27109

Files:
  lib/CodeGen/CGDecl.cpp
  test/CodeGenOpenCL/constant-addr-space-globals.cl


Index: test/CodeGenOpenCL/constant-addr-space-globals.cl
===================================================================
--- test/CodeGenOpenCL/constant-addr-space-globals.cl
+++ test/CodeGenOpenCL/constant-addr-space-globals.cl
@@ -6,3 +6,22 @@
 kernel void test(global float *out) {
   *out = array[0];
 }
+
+// Test that we don't use directly initializers for const aggregates
+// but create a copy in the original address space (unless a variable itself is
+// in the constant address space).
+
+void foo(constant const int *p1, const int *p2, const int *p3);
+// CHECK: @k.arr1 = internal addrspace(3) constant [3 x i32] [i32 1, i32 2, 
i32 3]
+// CHECK: @k.arr2 = private unnamed_addr addrspace(3) constant [3 x i32] [i32 
4, i32 5, i32 6]
+// CHECK: @k.arr3 = private unnamed_addr addrspace(3) constant [3 x i32] [i32 
7, i32 8, i32 9]
+kernel void k(void) {
+  // CHECK-NOT: %arr1 = alloca [3 x i32]
+  constant const int arr1[] = {1, 2, 3};
+  // CHECK: %arr2 = alloca [3 x i32]
+  const int arr2[] = {4, 5, 6};
+  // CHECK: %arr3 = alloca [3 x i32]
+  int arr3[] = {7, 8, 9};
+
+  foo(arr1, arr2, arr3);
+}
Index: lib/CodeGen/CGDecl.cpp
===================================================================
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -948,15 +948,18 @@
       // If the variable's a const type, and it's neither an NRVO
       // candidate nor a __block variable and has no mutable members,
       // emit it as a global instead.
-      if (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef &&
-          CGM.isTypeConstant(Ty, true)) {
-        EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
-
-        // Signal this condition to later callbacks.
-        emission.Addr = Address::invalid();
-        assert(emission.wasEmittedAsGlobal());
-        return emission;
-      }
+      // Exception is if a variable is located in non-constant address space
+      // in OpenCL.
+      if (!getLangOpts().OpenCL || Ty.getAddressSpace() == 
LangAS::opencl_constant)
+        if (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef &&
+            CGM.isTypeConstant(Ty, true)) {
+          EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
+
+          // Signal this condition to later callbacks.
+          emission.Addr = Address::invalid();
+          assert(emission.wasEmittedAsGlobal());
+          return emission;
+        }
 
       // Otherwise, tell the initialization code that we're in this case.
       emission.IsConstantAggregate = true;


Index: test/CodeGenOpenCL/constant-addr-space-globals.cl
===================================================================
--- test/CodeGenOpenCL/constant-addr-space-globals.cl
+++ test/CodeGenOpenCL/constant-addr-space-globals.cl
@@ -6,3 +6,22 @@
 kernel void test(global float *out) {
   *out = array[0];
 }
+
+// Test that we don't use directly initializers for const aggregates
+// but create a copy in the original address space (unless a variable itself is
+// in the constant address space).
+
+void foo(constant const int *p1, const int *p2, const int *p3);
+// CHECK: @k.arr1 = internal addrspace(3) constant [3 x i32] [i32 1, i32 2, i32 3]
+// CHECK: @k.arr2 = private unnamed_addr addrspace(3) constant [3 x i32] [i32 4, i32 5, i32 6]
+// CHECK: @k.arr3 = private unnamed_addr addrspace(3) constant [3 x i32] [i32 7, i32 8, i32 9]
+kernel void k(void) {
+  // CHECK-NOT: %arr1 = alloca [3 x i32]
+  constant const int arr1[] = {1, 2, 3};
+  // CHECK: %arr2 = alloca [3 x i32]
+  const int arr2[] = {4, 5, 6};
+  // CHECK: %arr3 = alloca [3 x i32]
+  int arr3[] = {7, 8, 9};
+
+  foo(arr1, arr2, arr3);
+}
Index: lib/CodeGen/CGDecl.cpp
===================================================================
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -948,15 +948,18 @@
       // If the variable's a const type, and it's neither an NRVO
       // candidate nor a __block variable and has no mutable members,
       // emit it as a global instead.
-      if (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef &&
-          CGM.isTypeConstant(Ty, true)) {
-        EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
-
-        // Signal this condition to later callbacks.
-        emission.Addr = Address::invalid();
-        assert(emission.wasEmittedAsGlobal());
-        return emission;
-      }
+      // Exception is if a variable is located in non-constant address space
+      // in OpenCL.
+      if (!getLangOpts().OpenCL || Ty.getAddressSpace() == LangAS::opencl_constant)
+        if (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef &&
+            CGM.isTypeConstant(Ty, true)) {
+          EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
+
+          // Signal this condition to later callbacks.
+          emission.Addr = Address::invalid();
+          assert(emission.wasEmittedAsGlobal());
+          return emission;
+        }
 
       // Otherwise, tell the initialization code that we're in this case.
       emission.IsConstantAggregate = true;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to