r339109 - AMDGPU: Add builtin for s_dcache_inv_vol

2018-08-07 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Tue Aug  7 00:49:04 2018
New Revision: 339109

URL: http://llvm.org/viewvc/llvm-project?rev=339109&view=rev
Log:
AMDGPU: Add builtin for s_dcache_inv_vol

Added:
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl
Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=339109&r1=339108&r2=339109&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Tue Aug  7 00:49:04 2018
@@ -101,6 +101,11 @@ BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3
 BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n")
 
 
//===--===//
+// CI+ only builtins.
+//===--===//
+TARGET_BUILTIN(__builtin_amdgcn_s_dcache_inv_vol, "v", "n", "ci-insts")
+
+//===--===//
 // VI+ only builtins.
 
//===--===//
 

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=339109&r1=339108&r2=339109&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Tue Aug  7 00:49:04 2018
@@ -148,12 +148,14 @@ bool AMDGPUTargetInfo::initFeatureMap(
   Features["16-bit-insts"] = true;
   Features["dpp"] = true;
   Features["s-memrealtime"] = true;
-  break;
+  LLVM_FALLTHROUGH;
 case GK_GFX704:
 case GK_GFX703:
 case GK_GFX702:
 case GK_GFX701:
 case GK_GFX700:
+  Features["ci-insts"] = true;
+  LLVM_FALLTHROUGH;
 case GK_GFX601:
 case GK_GFX600:
   break;

Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl?rev=339109&r1=339108&r2=339109&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl Tue Aug  7 00:49:04 2018
@@ -5,8 +5,16 @@
 
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx904 -S -emit-llvm -o - %s | 
FileCheck --check-prefix=GFX904 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -S -emit-llvm -o - %s | 
FileCheck --check-prefix=GFX906 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx801 -S -emit-llvm -o - %s | 
FileCheck --check-prefix=GFX801 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx700 -S -emit-llvm -o - %s | 
FileCheck --check-prefix=GFX700 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | 
FileCheck --check-prefix=GFX600 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | 
FileCheck --check-prefix=GFX601 %s
 
-// GFX904: 
"target-features"="+16-bit-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime"
-// GFX906: 
"target-features"="+16-bit-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime"
+// GFX904: 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime"
+// GFX906: 
"target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime"
+// GFX801: 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+s-memrealtime"
+// GFX700: "target-features"="+ci-insts,+fp64-fp16-denormals,-fp32-denormals"
+// GFX600: "target-features"="+fp32-denormals,+fp64-fp16-denormals"
+// GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
 
 kernel void test() {}

Added: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl?rev=339109&view=auto
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl (added)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl Tue Aug  7 00:49:04 2018
@@ -0,0 +1,12 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu hawaii -S 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu fiji -S 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S 
-emit-llvm -o - %s | FileCheck %s
+
+//

r339110 - AMDGPU: Add builtin for s_dcache_wb

2018-08-07 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Tue Aug  7 00:49:13 2018
New Revision: 339110

URL: http://llvm.org/viewvc/llvm-project?rev=339110&view=rev
Log:
AMDGPU: Add builtin for s_dcache_wb

Added:
cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-vi.cl
Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=339110&r1=339109&r2=339110&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Tue Aug  7 00:49:13 2018
@@ -121,6 +121,7 @@ TARGET_BUILTIN(__builtin_amdgcn_fracth,
 TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime")
 TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp")
+TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "vi-insts")
 
 
//===--===//
 // GFX9+ only builtins.

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=339110&r1=339109&r2=339110&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Tue Aug  7 00:49:13 2018
@@ -145,6 +145,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
 case GK_GFX803:
 case GK_GFX802:
 case GK_GFX801:
+  Features["vi-insts"] = true;
   Features["16-bit-insts"] = true;
   Features["dpp"] = true;
   Features["s-memrealtime"] = true;

Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl?rev=339110&r1=339109&r2=339110&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl Tue Aug  7 00:49:13 2018
@@ -10,9 +10,9 @@
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | 
FileCheck --check-prefix=GFX600 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | 
FileCheck --check-prefix=GFX601 %s
 
-// GFX904: 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime"
-// GFX906: 
"target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime"
-// GFX801: 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+s-memrealtime"
+// GFX904: 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts"
+// GFX906: 
"target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts"
+// GFX801: 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+s-memrealtime,+vi-insts"
 // GFX700: "target-features"="+ci-insts,+fp64-fp16-denormals,-fp32-denormals"
 // GFX600: "target-features"="+fp32-denormals,+fp64-fp16-denormals"
 // GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals"

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl?rev=339110&r1=339109&r2=339110&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl Tue Aug  7 00:49:13 2018
@@ -82,6 +82,13 @@ void test_s_memrealtime(global ulong* ou
   *out = __builtin_amdgcn_s_memrealtime();
 }
 
+// CHECK-LABEL: @test_s_dcache_wb()
+// CHECK: call void @llvm.amdgcn.s.dcache.wb()
+void test_s_dcache_wb()
+{
+  __builtin_amdgcn_s_dcache_wb();
+}
+
 // CHECK-LABEL: @test_mov_dpp
 // CHECK: call i32 @llvm.amdgcn.mov.dpp.i32(i32 %src, i32 0, i32 0, i32 0, i1 
false)
 void test_mov_dpp(global int* out, int src)

Added: cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-vi.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-vi.cl?rev=339110&view=auto
==
--- cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-vi.cl (added)
+++ cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-vi.cl Tue Aug  7 00:49:13 
2018
@@ -0,0 +1,8 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s

r339278 - AMDGPU: Fix enabling denormals by default on pre-VI targets

2018-08-08 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Wed Aug  8 10:48:37 2018
New Revision: 339278

URL: http://llvm.org/viewvc/llvm-project?rev=339278&view=rev
Log:
AMDGPU: Fix enabling denormals by default on pre-VI targets

Fast FMAF is not a sufficient condition to enable denormals.
Before VI, enabling denormals caused F32 instructions to
run at F64 speeds.

Modified:
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
cfe/trunk/lib/Basic/Targets/AMDGPU.h
cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl
cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=339278&r1=339277&r2=339278&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Wed Aug  8 10:48:37 2018
@@ -210,7 +210,8 @@ void AMDGPUTargetInfo::adjustTargetOptio
   }
   if (!hasFP32Denormals)
 TargetOpts.Features.push_back(
-(Twine(CGOptsGPU.HasFastFMAF && !CGOpts.FlushDenorm
+(Twine(CGOptsGPU.HasFastFMAF && CGOptsGPU.HasFullRateF32Denorms &&
+   !CGOpts.FlushDenorm
? '+'
: '-') +
  Twine("fp32-denormals"))

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.h?rev=339278&r1=339277&r2=339278&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.h (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.h Wed Aug  8 10:48:37 2018
@@ -94,77 +94,78 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTarg
 bool HasLDEXPF;
 bool HasFP64;
 bool HasFastFMA;
+bool HasFullRateF32Denorms;
   };
 
   static constexpr GPUInfo InvalidGPU =
-{{""}, {""}, GK_NONE, false, false, false, false, false};
+{{""}, {""}, GK_NONE, false, false, false, false, false, false};
   static constexpr GPUInfo R600GPUs[26] = {
-  // Name CanonicalKindHasHasHasHasHas
-  //  Name FMAF   Fast   LDEXPF FP64   Fast
-  //  FMAF FMA
-{{"r600"},{"r600"},GK_R600,false, false, false, false, false},
-{{"rv630"},   {"r600"},GK_R600,false, false, false, false, false},
-{{"rv635"},   {"r600"},GK_R600,false, false, false, false, false},
-{{"r630"},{"r630"},GK_R630,false, false, false, false, false},
-{{"rs780"},   {"rs880"},   GK_RS880,   false, false, false, false, false},
-{{"rs880"},   {"rs880"},   GK_RS880,   false, false, false, false, false},
-{{"rv610"},   {"rs880"},   GK_RS880,   false, false, false, false, false},
-{{"rv620"},   {"rs880"},   GK_RS880,   false, false, false, false, false},
-{{"rv670"},   {"rv670"},   GK_RV670,   false, false, false, false, false},
-{{"rv710"},   {"rv710"},   GK_RV710,   false, false, false, false, false},
-{{"rv730"},   {"rv730"},   GK_RV730,   false, false, false, false, false},
-{{"rv740"},   {"rv770"},   GK_RV770,   false, false, false, false, false},
-{{"rv770"},   {"rv770"},   GK_RV770,   false, false, false, false, false},
-{{"cedar"},   {"cedar"},   GK_CEDAR,   false, false, false, false, false},
-{{"palm"},{"cedar"},   GK_CEDAR,   false, false, false, false, false},
-{{"cypress"}, {"cypress"}, GK_CYPRESS, true,  false, false, false, false},
-{{"hemlock"}, {"cypress"}, GK_CYPRESS, true,  false, false, false, false},
-{{"juniper"}, {"juniper"}, GK_JUNIPER, false, false, false, false, false},
-{{"redwood"}, {"redwood"}, GK_REDWOOD, false, false, false, false, false},
-{{"sumo"},{"sumo"},GK_SUMO,false, false, false, false, false},
-{{"sumo2"},   {"sumo"},GK_SUMO,false, false, false, false, false},
-{{"barts"},   {"barts"},   GK_BARTS,   false, false, false, false, false},
-{{"caicos"},  {"caicos"},  GK_BARTS,   false, false, false, false, false},
-{{"aruba"},   {"cayman"},  GK_CAYMAN,  true,  false, false, false, false},
-{{"cayman"},  {"cayman"},  GK_CAYMAN,  true,  false, false, false, false},
-{{"turks"},   {"turks"},   GK_TURKS,   false, false, false, false, false},
+  // Name CanonicalKindHasHasHasHasHas
Has
+  //  Name FMAF   Fast   LDEXPF FP64   Fast   
Fast
+  //  FMAF FMA
Denorm
+{{"r600"},{"r600"},GK_R600,false, false, false, false, false, 
false},
+{{"rv630"},   {"r600"},GK_R600,false, false, false, false, false, 
false},
+{{"rv635"},   {"r600"},GK_R600,false, false, false, false, false, 
false},
+{{"r630"},{"r630"},GK_R630,false, false, false, false, false, 
false},
+{{"rs780"},   {"rs880"},   GK_RS8

r339395 - AMDGPU: Add another missing builtin

2018-08-09 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Thu Aug  9 15:18:37 2018
New Revision: 339395

URL: http://llvm.org/viewvc/llvm-project?rev=339395&view=rev
Log:
AMDGPU: Add another missing builtin

Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=339395&r1=339394&r2=339395&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Thu Aug  9 15:18:37 2018
@@ -104,6 +104,7 @@ BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3
 // CI+ only builtins.
 
//===--===//
 TARGET_BUILTIN(__builtin_amdgcn_s_dcache_inv_vol, "v", "n", "ci-insts")
+TARGET_BUILTIN(__builtin_amdgcn_buffer_wbinvl1_vol, "v", "n", "ci-insts")
 
 
//===--===//
 // VI+ only builtins.

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl?rev=339395&r1=339394&r2=339395&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl Thu Aug  9 15:18:37 2018
@@ -10,3 +10,10 @@ void test_s_dcache_inv_vol()
   __builtin_amdgcn_s_dcache_inv_vol();
 }
 
+// CHECK-LABEL: @test_buffer_wbinvl1_vol
+// CHECK: call void @llvm.amdgcn.buffer.wbinvl1.vol()
+void test_buffer_wbinvl1_vol()
+{
+  __builtin_amdgcn_buffer_wbinvl1_vol();
+}
+

Modified: cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl?rev=339395&r1=339394&r2=339395&view=diff
==
--- cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl (original)
+++ cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl Thu Aug  9 15:18:37 
2018
@@ -1,7 +1,8 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s
 
-void test_ci_s_dcache_inv_vol()
+void test_ci_biltins()
 {
   __builtin_amdgcn_s_dcache_inv_vol(); // expected-error 
{{'__builtin_amdgcn_s_dcache_inv_vol' needs target feature ci-insts}}
+  __builtin_amdgcn_buffer_wbinvl1_vol(); // expected-error 
{{'__builtin_amdgcn_buffer_wbinvl1_vol' needs target feature ci-insts}}
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r339934 - AMDGPU: Correct errors in device table

2018-08-16 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Thu Aug 16 13:19:47 2018
New Revision: 339934

URL: http://llvm.org/viewvc/llvm-project?rev=339934&view=rev
Log:
AMDGPU: Correct errors in device table

Modified:
cfe/trunk/lib/Basic/Targets/AMDGPU.h

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.h?rev=339934&r1=339933&r2=339934&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.h (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.h Thu Aug 16 13:19:47 2018
@@ -125,7 +125,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTarg
 {{"sumo"},{"sumo"},GK_SUMO,false, false, false, false, false, 
false},
 {{"sumo2"},   {"sumo"},GK_SUMO,false, false, false, false, false, 
false},
 {{"barts"},   {"barts"},   GK_BARTS,   false, false, false, false, false, 
false},
-{{"caicos"},  {"caicos"},  GK_BARTS,   false, false, false, false, false, 
false},
+{{"caicos"},  {"caicos"},  GK_CAICOS,  false, false, false, false, false, 
false},
 {{"aruba"},   {"cayman"},  GK_CAYMAN,  true,  false, false, false, false, 
false},
 {{"cayman"},  {"cayman"},  GK_CAYMAN,  true,  false, false, false, false, 
false},
 {{"turks"},   {"turks"},   GK_TURKS,   false, false, false, false, false, 
false},
@@ -163,7 +163,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTarg
 {{"gfx810"},{"gfx810"},  GK_GFX810,  true, false, true,  true, true, 
true},
 {{"stoney"},{"gfx810"},  GK_GFX810,  true, false, true,  true, true, 
true},
 {{"gfx900"},{"gfx900"},  GK_GFX900,  true, true,  true,  true, true, 
true},
-{{"gfx902"},{"gfx902"},  GK_GFX900,  true, true,  true,  true, true, 
true},
+{{"gfx902"},{"gfx902"},  GK_GFX902,  true, true,  true,  true, true, 
true},
 {{"gfx904"},{"gfx904"},  GK_GFX904,  true, true,  true,  true, true, 
true},
 {{"gfx906"},{"gfx906"},  GK_GFX906,  true, true,  true,  true, true, 
true},
   };


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r340193 - Rename -mlink-cuda-bitcode to -mlink-builtin-bitcode

2018-08-20 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Mon Aug 20 11:16:48 2018
New Revision: 340193

URL: http://llvm.org/viewvc/llvm-project?rev=340193&view=rev
Log:
Rename -mlink-cuda-bitcode to -mlink-builtin-bitcode

The same semantics work for OpenCL, and probably any offload
language. Keep the old name around as an alias.

Modified:
cfe/trunk/include/clang/Driver/CC1Options.td
cfe/trunk/lib/Driver/ToolChains/Cuda.cpp
cfe/trunk/lib/Frontend/CompilerInvocation.cpp
cfe/trunk/test/CodeGenCUDA/link-device-bitcode.cu
cfe/trunk/test/CodeGenCUDA/propagate-metadata.cu
cfe/trunk/test/Driver/cuda-detect.cu
cfe/trunk/test/Driver/openmp-offload-gpu.c

Modified: cfe/trunk/include/clang/Driver/CC1Options.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/CC1Options.td?rev=340193&r1=340192&r2=340193&view=diff
==
--- cfe/trunk/include/clang/Driver/CC1Options.td (original)
+++ cfe/trunk/include/clang/Driver/CC1Options.td Mon Aug 20 11:16:48 2018
@@ -287,9 +287,11 @@ def mconstructor_aliases : Flag<["-"], "
   HelpText<"Emit complete constructors and destructors as aliases when 
possible">;
 def mlink_bitcode_file : Separate<["-"], "mlink-bitcode-file">,
   HelpText<"Link the given bitcode file before performing optimizations.">;
-def mlink_cuda_bitcode : Separate<["-"], "mlink-cuda-bitcode">,
+def mlink_builtin_bitcode : Separate<["-"], "mlink-builtin-bitcode">,
   HelpText<"Link and internalize needed symbols from the given bitcode file "
"before performing optimizations.">;
+def mlink_cuda_bitcode : Separate<["-"], "mlink-cuda-bitcode">,
+  Alias;
 def vectorize_loops : Flag<["-"], "vectorize-loops">,
   HelpText<"Run the Loop vectorization passes">;
 def vectorize_slp : Flag<["-"], "vectorize-slp">,

Modified: cfe/trunk/lib/Driver/ToolChains/Cuda.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Cuda.cpp?rev=340193&r1=340192&r2=340193&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/Cuda.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/Cuda.cpp Mon Aug 20 11:16:48 2018
@@ -621,7 +621,7 @@ void CudaToolChain::addClangTargetOption
 return;
   }
 
-  CC1Args.push_back("-mlink-cuda-bitcode");
+  CC1Args.push_back("-mlink-builtin-bitcode");
   CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
 
   // Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM
@@ -667,7 +667,7 @@ void CudaToolChain::addClangTargetOption
   SmallString<128> LibOmpTargetFile(LibraryPath);
   llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
   if (llvm::sys::fs::exists(LibOmpTargetFile)) {
-CC1Args.push_back("-mlink-cuda-bitcode");
+CC1Args.push_back("-mlink-builtin-bitcode");
 CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
 FoundBCLibrary = true;
 break;

Modified: cfe/trunk/lib/Frontend/CompilerInvocation.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CompilerInvocation.cpp?rev=340193&r1=340192&r2=340193&view=diff
==
--- cfe/trunk/lib/Frontend/CompilerInvocation.cpp (original)
+++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp Mon Aug 20 11:16:48 2018
@@ -912,10 +912,10 @@ static bool ParseCodeGenArgs(CodeGenOpti
   Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations);
   Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir);
   for (auto *A :
-   Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_cuda_bitcode)) {
+   Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_builtin_bitcode)) {
 CodeGenOptions::BitcodeFileToLink F;
 F.Filename = A->getValue();
-if (A->getOption().matches(OPT_mlink_cuda_bitcode)) {
+if (A->getOption().matches(OPT_mlink_builtin_bitcode)) {
   F.LinkFlags = llvm::Linker::Flags::LinkOnlyNeeded;
   // When linking CUDA bitcode, propagate function attributes so that
   // e.g. libdevice gets fast-math attrs if we're building with fast-math.

Modified: cfe/trunk/test/CodeGenCUDA/link-device-bitcode.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/link-device-bitcode.cu?rev=340193&r1=340192&r2=340193&view=diff
==
--- cfe/trunk/test/CodeGenCUDA/link-device-bitcode.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/link-device-bitcode.cu Mon Aug 20 11:16:48 2018
@@ -11,13 +11,19 @@
 //
 // Make sure function in device-code gets linked in and internalized.
 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
+// RUN:-mlink-builtin-bitcode %t.bc  -emit-llvm \
+// RUN:-disable-llvm-passes -o - %s \
+// RUN:| FileCheck %s -check-prefix CHECK-IR
+
+// Make sure legacy flag name works
+// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-d

r340292 - AMDGPU: Move target code into TargetParser

2018-08-21 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Tue Aug 21 09:13:29 2018
New Revision: 340292

URL: http://llvm.org/viewvc/llvm-project?rev=340292&view=rev
Log:
AMDGPU: Move target code into TargetParser

Modified:
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
cfe/trunk/lib/Basic/Targets/AMDGPU.h

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=340292&r1=340291&r2=340292&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Tue Aug 21 09:13:29 2018
@@ -127,12 +127,14 @@ bool AMDGPUTargetInfo::initFeatureMap(
 llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU,
 const std::vector &FeatureVec) const {
 
+  using namespace llvm::AMDGPU;
+
   // XXX - What does the member GPU mean if device name string passed here?
   if (isAMDGCN(getTriple())) {
 if (CPU.empty())
   CPU = "gfx600";
 
-switch (parseAMDGCNName(CPU).Kind) {
+switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
 case GK_GFX906:
   Features["dl-insts"] = true;
   LLVM_FALLTHROUGH;
@@ -169,7 +171,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
 if (CPU.empty())
   CPU = "r600";
 
-switch (parseR600Name(CPU).Kind) {
+switch (llvm::AMDGPU::parseArchR600(CPU)) {
 case GK_CAYMAN:
 case GK_CYPRESS:
 case GK_RV770:
@@ -201,7 +203,7 @@ void AMDGPUTargetInfo::adjustTargetOptio
TargetOptions &TargetOpts) const {
   bool hasFP32Denormals = false;
   bool hasFP64Denormals = false;
-  GPUInfo CGOptsGPU = parseGPUName(TargetOpts.CPU);
+
   for (auto &I : TargetOpts.FeaturesAsWritten) {
 if (I == "+fp32-denormals" || I == "-fp32-denormals")
   hasFP32Denormals = true;
@@ -210,54 +212,20 @@ void AMDGPUTargetInfo::adjustTargetOptio
   }
   if (!hasFP32Denormals)
 TargetOpts.Features.push_back(
-(Twine(CGOptsGPU.HasFastFMAF && CGOptsGPU.HasFullRateF32Denorms &&
-   !CGOpts.FlushDenorm
-   ? '+'
-   : '-') +
- Twine("fp32-denormals"))
+  (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
+ ? '+' : '-') + Twine("fp32-denormals"))
 .str());
   // Always do not flush fp64 or fp16 denorms.
-  if (!hasFP64Denormals && CGOptsGPU.HasFP64)
+  if (!hasFP64Denormals && hasFP64())
 TargetOpts.Features.push_back("+fp64-fp16-denormals");
 }
 
-constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::InvalidGPU;
-constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::R600GPUs[];
-constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::AMDGCNGPUs[];
-
-AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseR600Name(StringRef Name) {
-  const auto *Result = llvm::find_if(
-  R600GPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; });
-
-  if (Result == std::end(R600GPUs))
-return InvalidGPU;
-  return *Result;
-}
-
-AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) {
-  const auto *Result = llvm::find_if(
-  AMDGCNGPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; });
-
-  if (Result == std::end(AMDGCNGPUs))
-return InvalidGPU;
-  return *Result;
-}
-
-AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseGPUName(StringRef Name) const 
{
-  if (isAMDGCN(getTriple()))
-return parseAMDGCNName(Name);
-  else
-return parseR600Name(Name);
-}
-
 void AMDGPUTargetInfo::fillValidCPUList(
 SmallVectorImpl &Values) const {
   if (isAMDGCN(getTriple()))
-llvm::for_each(AMDGCNGPUs, [&Values](const GPUInfo &GPU) {
-   Values.emplace_back(GPU.Name);});
+llvm::AMDGPU::fillValidArchListAMDGCN(Values);
   else
-llvm::for_each(R600GPUs, [&Values](const GPUInfo &GPU) {
-   Values.emplace_back(GPU.Name);});
+llvm::AMDGPU::fillValidArchListR600(Values);
 }
 
 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
@@ -267,7 +235,12 @@ void AMDGPUTargetInfo::setAddressSpaceMa
 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
const TargetOptions &Opts)
 : TargetInfo(Triple),
-  GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) {
+  GPUKind(isAMDGCN(Triple) ?
+  llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
+  llvm::AMDGPU::parseArchR600(Opts.CPU)),
+  GPUFeatures(isAMDGCN(Triple) ?
+  llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
+  llvm::AMDGPU::getArchAttrR600(GPUKind)) {
   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
 : DataLayoutStringR600);
   assert(DataLayout->getAllocaAddrSpace() == Private);
@@ -312,19 +285,22 @@ void AMDGPUTargetInfo::getTargetDefines(
   else
 Builder.defineMacro("__R600__");
 
-  if (GPU.Kind != GK_NONE)
-Builder.defineMacro(Twine("__") + Twine(GPU.C

r315094 - OpenCL: Assume functions are convergent

2017-10-06 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Fri Oct  6 12:34:40 2017
New Revision: 315094

URL: http://llvm.org/viewvc/llvm-project?rev=315094&view=rev
Log:
OpenCL: Assume functions are convergent

This was done for CUDA functions in r261779, and for the same
reason this also needs to be done for OpenCL. An arbitrary
function could have a barrier() call in it, which in turn
requires the calling function to be convergent.

Modified:
cfe/trunk/include/clang/Basic/LangOptions.h
cfe/trunk/lib/CodeGen/CGCall.cpp
cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl
cfe/trunk/test/CodeGenOpenCL/convergent.cl

Modified: cfe/trunk/include/clang/Basic/LangOptions.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/LangOptions.h?rev=315094&r1=315093&r2=315094&view=diff
==
--- cfe/trunk/include/clang/Basic/LangOptions.h (original)
+++ cfe/trunk/include/clang/Basic/LangOptions.h Fri Oct  6 12:34:40 2017
@@ -197,6 +197,10 @@ public:
   bool allowsNonTrivialObjCLifetimeQualifiers() const {
 return ObjCAutoRefCount || ObjCWeak;
   }
+
+  bool assumeFunctionsAreConvergent() const {
+return (CUDA && CUDAIsDevice) || OpenCL;
+  }
 };
 
 /// \brief Floating point control options

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=315094&r1=315093&r2=315094&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Fri Oct  6 12:34:40 2017
@@ -1750,13 +1750,16 @@ void CodeGenModule::ConstructDefaultFnAt
   FuncAttrs.addAttribute("backchain");
   }
 
-  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
-// Conservatively, mark all functions and calls in CUDA as convergent
-// (meaning, they may call an intrinsically convergent op, such as
-// __syncthreads(), and so can't have certain optimizations applied around
-// them).  LLVM will remove this attribute where it safely can.
+  if (getLangOpts().assumeFunctionsAreConvergent()) {
+// Conservatively, mark all functions and calls in CUDA and OpenCL as
+// convergent (meaning, they may call an intrinsically convergent op, such
+// as __syncthreads() / barrier(), and so can't have certain optimizations
+// applied around them).  LLVM will remove this attribute where it safely
+// can.
 FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+  }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
 // Exceptions aren't supported in CUDA device code.
 FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
 

Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl?rev=315094&r1=315093&r2=315094&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl Fri Oct  6 12:34:40 2017
@@ -151,28 +151,28 @@ kernel void reqd_work_group_size_32_2_1_
 // CHECK-NOT: "amdgpu-num-sgpr"="0"
 // CHECK-NOT: "amdgpu-num-vgpr"="0"
 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind 
optnone "amdgpu-flat-work-group-size"="32,64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { noinline nounwind 
optnone "amdgpu-flat-work-group-size"="64,64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { noinline nounwind 
optnone "amdgpu-flat-work-group-size"="16,128"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind optnone 
"amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind optnone 
"amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind optnone 
"amdgpu-num-sgpr"="32"
-// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind optnone 
"amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent 
noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent 
noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent 
noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind 
optnone "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind 
optnone "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind 
optnone "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind 
optnone "amdgpu-num-vgpr"="64"
 
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { 
noinline nounwind optnone "amdgpu-fla

r315219 - AMDGPU: Fix missing declaration for __builtin_amdgcn_dispatch_ptr

2017-10-09 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Mon Oct  9 10:44:18 2017
New Revision: 315219

URL: http://llvm.org/viewvc/llvm-project?rev=315219&view=rev
Log:
AMDGPU: Fix missing declaration for __builtin_amdgcn_dispatch_ptr

Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=315219&r1=315218&r2=315219&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Mon Oct  9 10:44:18 2017
@@ -21,6 +21,7 @@
 // SI+ only builtins.
 
//===--===//
 
+BUILTIN(__builtin_amdgcn_dispatch_ptr, "Uc*2", "nc")
 BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "Uc*2", "nc")
 BUILTIN(__builtin_amdgcn_implicitarg_ptr, "Uc*2", "nc")
 

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=315219&r1=315218&r2=315219&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Mon Oct  9 10:44:18 2017
@@ -421,6 +421,13 @@ void test_read_exec(global ulong* out) {
 
 // CHECK: declare i64 @llvm.read_register.i64(metadata) 
#[[NOUNWIND_READONLY:[0-9]+]]
 
+// CHECK-LABEL: @test_dispatch_ptr
+// CHECK: call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
+void test_dispatch_ptr(__attribute__((address_space(2))) unsigned char ** out)
+{
+  *out = __builtin_amdgcn_dispatch_ptr();
+}
+
 // CHECK-LABEL: @test_kernarg_segment_ptr
 // CHECK: call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
 void test_kernarg_segment_ptr(__attribute__((address_space(2))) unsigned char 
** out)


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r315238 - AMDGPU: Add read_exec_lo/hi builtins

2017-10-09 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Mon Oct  9 13:06:37 2017
New Revision: 315238

URL: http://llvm.org/viewvc/llvm-project?rev=315238&view=rev
Log:
AMDGPU: Add read_exec_lo/hi builtins

Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=315238&r1=315237&r2=315238&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Mon Oct  9 13:06:37 2017
@@ -121,6 +121,8 @@ TARGET_BUILTIN(__builtin_amdgcn_fmed3h,
 // Special builtins.
 
//===--===//
 BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc")
+BUILTIN(__builtin_amdgcn_read_exec_lo, "Ui", "nc")
+BUILTIN(__builtin_amdgcn_read_exec_hi, "Ui", "nc")
 
 
//===--===//
 // R600-NI only builtins.

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=315238&r1=315237&r2=315238&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Oct  9 13:06:37 2017
@@ -9103,6 +9103,15 @@ Value *CodeGenFunction::EmitAMDGPUBuilti
 CI->setConvergent();
 return CI;
   }
+  case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
+  case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
+StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
+  "exec_lo" : "exec_hi";
+CallInst *CI = cast(
+  EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName));
+CI->setConvergent();
+return CI;
+  }
 
   // amdgcn workitem
   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=315238&r1=315237&r2=315238&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Mon Oct  9 13:06:37 2017
@@ -421,6 +421,18 @@ void test_read_exec(global ulong* out) {
 
 // CHECK: declare i64 @llvm.read_register.i64(metadata) 
#[[NOUNWIND_READONLY:[0-9]+]]
 
+// CHECK-LABEL: @test_read_exec_lo(
+// CHECK: call i32 @llvm.read_register.i32(metadata ![[EXEC_LO:[0-9]+]]) 
#[[READ_EXEC_ATTRS]]
+void test_read_exec_lo(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_lo();
+}
+
+// CHECK-LABEL: @test_read_exec_hi(
+// CHECK: call i32 @llvm.read_register.i32(metadata ![[EXEC_HI:[0-9]+]]) 
#[[READ_EXEC_ATTRS]]
+void test_read_exec_hi(global uint* out) {
+  *out = __builtin_amdgcn_read_exec_hi();
+}
+
 // CHECK-LABEL: @test_dispatch_ptr
 // CHECK: call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
 void test_dispatch_ptr(__attribute__((address_space(2))) unsigned char ** out)
@@ -499,3 +511,5 @@ void test_s_getpc(global ulong* out)
 // CHECK-DAG: attributes #[[NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
 // CHECK-DAG: attributes #[[READ_EXEC_ATTRS]] = { convergent }
 // CHECK-DAG: ![[EXEC]] = !{!"exec"}
+// CHECK-DAG: ![[EXEC_LO]] = !{!"exec_lo"}
+// CHECK-DAG: ![[EXEC_HI]] = !{!"exec_hi"}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r341033 - AMDGPU: Default to hidden visibility

2018-08-30 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Thu Aug 30 01:18:06 2018
New Revision: 341033

URL: http://llvm.org/viewvc/llvm-project?rev=341033&view=rev
Log:
AMDGPU: Default to hidden visibility

Object linking isn't supported, so it's not useful
to emit default visibility. Default visibility requires
relocations we don't yet support for functions compiled
in another translation unit.

WebAssembly already does this, although they insert these
arguments in a different place for some reason.

Added:
cfe/trunk/test/Driver/amdgpu-visibility.cl
Modified:
cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp
cfe/trunk/lib/Driver/ToolChains/AMDGPU.h

Modified: cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp?rev=341033&r1=341032&r2=341033&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp Thu Aug 30 01:18:06 2018
@@ -98,3 +98,16 @@ AMDGPUToolChain::TranslateArgs(const Der
 
   return DAL;
 }
+
+void AMDGPUToolChain::addClangTargetOptions(
+const llvm::opt::ArgList &DriverArgs,
+llvm::opt::ArgStringList &CC1Args,
+Action::OffloadKind DeviceOffloadingKind) const {
+  // Default to "hidden" visibility, as object level linking will not be
+  // supported for the forseeable future.
+  if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
+ options::OPT_fvisibility_ms_compat)) {
+CC1Args.push_back("-fvisibility");
+CC1Args.push_back("hidden");
+  }
+}

Modified: cfe/trunk/lib/Driver/ToolChains/AMDGPU.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/AMDGPU.h?rev=341033&r1=341032&r2=341033&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/AMDGPU.h (original)
+++ cfe/trunk/lib/Driver/ToolChains/AMDGPU.h Thu Aug 30 01:18:06 2018
@@ -61,6 +61,10 @@ public:
   llvm::opt::DerivedArgList *
   TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
 Action::OffloadKind DeviceOffloadKind) const override;
+
+  void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args,
+ Action::OffloadKind DeviceOffloadKind) const 
override;
 };
 
 } // end namespace toolchains

Added: cfe/trunk/test/Driver/amdgpu-visibility.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/amdgpu-visibility.cl?rev=341033&view=auto
==
--- cfe/trunk/test/Driver/amdgpu-visibility.cl (added)
+++ cfe/trunk/test/Driver/amdgpu-visibility.cl Thu Aug 30 01:18:06 2018
@@ -0,0 +1,7 @@
+// RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm %s 2>&1 | 
FileCheck -check-prefix=DEFAULT %s
+// RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm 
-fvisibility=protected  %s 2>&1 | FileCheck -check-prefix=OVERRIDE-PROTECTED  %s
+// RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm 
-fvisibility-ms-compat  %s 2>&1 | FileCheck -check-prefix=OVERRIDE-MS  %s
+
+// DEFAULT: "-fvisibility" "hidden"
+// OVERRIDE-PROTECTED: "-fvisibility" "protected"
+// OVERRIDE-MS:  "-fvisibility" "hidden" "-ftype-visibility" "default"


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r338707 - Try to make builtin address space declarations not useless

2018-08-02 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Thu Aug  2 05:14:28 2018
New Revision: 338707

URL: http://llvm.org/viewvc/llvm-project?rev=338707&view=rev
Log:
Try to make builtin address space declarations not useless

The way address space declarations for builtins currently work
is nearly useless. The code assumes the address spaces used for
builtins is a confusingly named "target address space" from user
code using __attribute__((address_space(N))) that matches
the builtin declaration. There's no way to use this to declare
a builtin that returns a language specific address space.
The terminology used is highly cofusing since it has nothing
to do with the the address space selected by the target to use
for a language address space.

This feature is essentially unused as-is. AMDGPU and NVPTX
are the only in-tree targets attempting to use this. The AMDGPU
builtins certainly do not behave as intended (i.e. all of the
builtins returning pointers can never compile because the numbered
address space never matches the expected named address space).

The NVPTX builtins are missing tests for some, and the others
seem to rely on an implicit addrspacecast.

Change the used address space for builtins based on a target
hook to allow using a language address space for a builtin.
This allows the same builtin declaration to be used for multiple
languages with similarly purposed address spaces (e.g. the same
AMDGPU builtin can be used in OpenCL and CUDA even though the
constant address spaces are arbitarily different).

This breaks the possibility of using arbitrary numbered
address spaces alongside the named address spaces for builtins.
If this is an issue we probably need to introduce another builtin
declaration character to distinguish language address spaces from
so-called "target address spaces".

Added:
cfe/trunk/test/CodeGenCUDA/builtins-amdgcn.cu
cfe/trunk/test/CodeGenOpenCL/numbered-address-space.cl
cfe/trunk/test/SemaOpenCL/numbered-address-space.cl
Modified:
cfe/trunk/include/clang/AST/ASTContext.h
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/include/clang/Basic/TargetInfo.h
cfe/trunk/lib/AST/ASTContext.cpp
cfe/trunk/lib/Basic/Targets/AMDGPU.h
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Sema/SemaExpr.cpp
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl

Modified: cfe/trunk/include/clang/AST/ASTContext.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/ASTContext.h?rev=338707&r1=338706&r2=338707&view=diff
==
--- cfe/trunk/include/clang/AST/ASTContext.h (original)
+++ cfe/trunk/include/clang/AST/ASTContext.h Thu Aug  2 05:14:28 2018
@@ -2488,6 +2488,8 @@ public:
 
   unsigned getTargetAddressSpace(LangAS AS) const;
 
+  LangAS getLangASForBuiltinAddressSpace(unsigned AS) const;
+
   /// Get target-dependent integer value for null pointer which is used for
   /// constant folding.
   uint64_t getTargetNullPointerValue(QualType QT) const;

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=338707&r1=338706&r2=338707&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Thu Aug  2 05:14:28 2018
@@ -21,9 +21,9 @@
 // SI+ only builtins.
 
//===--===//
 
-BUILTIN(__builtin_amdgcn_dispatch_ptr, "Uc*4", "nc")
-BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "Uc*4", "nc")
-BUILTIN(__builtin_amdgcn_implicitarg_ptr, "Uc*4", "nc")
+BUILTIN(__builtin_amdgcn_dispatch_ptr, "v*4", "nc")
+BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "v*4", "nc")
+BUILTIN(__builtin_amdgcn_implicitarg_ptr, "v*4", "nc")
 
 BUILTIN(__builtin_amdgcn_workgroup_id_x, "Ui", "nc")
 BUILTIN(__builtin_amdgcn_workgroup_id_y, "Ui", "nc")
@@ -45,6 +45,8 @@ BUILTIN(__builtin_amdgcn_s_barrier, "v",
 BUILTIN(__builtin_amdgcn_wave_barrier, "v", "n")
 BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n")
 BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
+
+// FIXME: Need to disallow constant address space.
 BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")
 BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n")
 BUILTIN(__builtin_amdgcn_div_fmas, "b", "nc")
@@ -93,9 +95,9 @@ BUILTIN(__builtin_amdgcn_ds_bpermute, "i
 BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc")
 BUILTIN(__builtin_amdgcn_readlane, "iii", "nc")
 BUILTIN(__builtin_amdgcn_fmed3f, "", "nc")
-BUILTIN(__builtin_amdgcn_ds_faddf, "ff*fIiIiIb", "n")
-BUILTIN(__builtin_amdgcn_ds_fminf, "ff*fIiIiIb", "n")
-BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*fIiIiIb", "n")
+BUILTIN(__builtin_amdgcn_ds_faddf, "ff*3fIiIiIb", "n")
+BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3fIiIiIb", "n")
+BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n")
 
 
//===---

r338754 - AMDGPU: Fix missing declaration of queue ptr builtin

2018-08-02 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Thu Aug  2 11:24:55 2018
New Revision: 338754

URL: http://llvm.org/viewvc/llvm-project?rev=338754&view=rev
Log:
AMDGPU: Fix missing declaration of queue ptr builtin

Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=338754&r1=338753&r2=338754&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Thu Aug  2 11:24:55 2018
@@ -24,6 +24,7 @@
 BUILTIN(__builtin_amdgcn_dispatch_ptr, "v*4", "nc")
 BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "v*4", "nc")
 BUILTIN(__builtin_amdgcn_implicitarg_ptr, "v*4", "nc")
+BUILTIN(__builtin_amdgcn_queue_ptr, "v*4", "nc")
 
 BUILTIN(__builtin_amdgcn_workgroup_id_x, "Ui", "nc")
 BUILTIN(__builtin_amdgcn_workgroup_id_y, "Ui", "nc")

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=338754&r1=338753&r2=338754&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Thu Aug  2 11:24:55 2018
@@ -462,6 +462,13 @@ void test_dispatch_ptr(__constant unsign
   *out = __builtin_amdgcn_dispatch_ptr();
 }
 
+// CHECK-LABEL: @test_queue_ptr
+// CHECK: call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
+void test_queue_ptr(__constant unsigned char ** out)
+{
+  *out = __builtin_amdgcn_queue_ptr();
+}
+
 // CHECK-LABEL: @test_kernarg_segment_ptr
 // CHECK: call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
 void test_kernarg_segment_ptr(__constant unsigned char ** out)


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r324641 - Fix crash on array initializer with non-0 alloca addrspace

2018-02-08 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Thu Feb  8 11:37:09 2018
New Revision: 324641

URL: http://llvm.org/viewvc/llvm-project?rev=324641&view=rev
Log:
Fix crash on array initializer with non-0 alloca addrspace

Modified:
cfe/trunk/lib/CodeGen/CGDecl.cpp
cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl
cfe/trunk/test/CodeGenOpenCL/private-array-initialization.cl

Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDecl.cpp?rev=324641&r1=324640&r2=324641&view=diff
==
--- cfe/trunk/lib/CodeGen/CGDecl.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGDecl.cpp Thu Feb  8 11:37:09 2018
@@ -1337,7 +1337,8 @@ void CodeGenFunction::EmitAutoVarInit(co
  isVolatile);
 // Zero and undef don't require a stores.
 if (!constant->isNullValue() && !isa(constant)) {
-  Loc = Builder.CreateBitCast(Loc, constant->getType()->getPointerTo());
+  Loc = Builder.CreateBitCast(Loc,
+constant->getType()->getPointerTo(Loc.getAddressSpace()));
   emitStoresForInitAfterMemset(constant, Loc.getPointer(),
isVolatile, Builder);
 }

Modified: cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl?rev=324641&r1=324640&r2=324641&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl 
(original)
+++ cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl Thu Feb 
 8 11:37:09 2018
@@ -20,3 +20,13 @@ __constant ConstantArrayPointerStruct co
 &constant_array_struct.f
 };
 
+__kernel void initializer_cast_is_valid_crash()
+{
+  unsigned char v512[64] = {
+  
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00
+  };
+
+}

Modified: cfe/trunk/test/CodeGenOpenCL/private-array-initialization.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/private-array-initialization.cl?rev=324641&r1=324640&r2=324641&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/private-array-initialization.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/private-array-initialization.cl Thu Feb  8 
11:37:09 2018
@@ -1,9 +1,33 @@
-// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -emit-llvm -o - | 
FileCheck %s
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -emit-llvm -o - | 
FileCheck -check-prefix=PRIVATE0 %s
+// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa-unknown -O0 -emit-llvm -o - | 
FileCheck -check-prefix=PRIVATE5 %s
 
 // CHECK: @test.arr = private unnamed_addr addrspace(2) constant [3 x i32] 
[i32 1, i32 2, i32 3], align 4
 
 void test() {
   __private int arr[] = {1, 2, 3};
-// CHECK:  %[[arr_i8_ptr:[0-9]+]] = bitcast [3 x i32]* %arr to i8*
-// CHECK:  call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[arr_i8_ptr]], 
i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* @test.arr to i8 
addrspace(2)*), i32 12, i1 false)
+// PRIVATE0:  %[[arr_i8_ptr:[0-9]+]] = bitcast [3 x i32]* %arr to i8*
+// PRIVATE0:  call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 
%[[arr_i8_ptr]], i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* 
@test.arr to i8 addrspace(2)*), i32 12, i1 false)
+
+// PRIVATE5: %arr = alloca [3 x i32], align 4, addrspace(5)
+// PRIVATE5: %0 = bitcast [3 x i32] addrspace(5)* %arr to i8 addrspace(5)*
+// PRIVATE5: call void @llvm.memcpy.p5i8.p2i8.i64(i8 addrspace(5)* align 4 %0, 
i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* @test.arr to i8 
addrspace(2)*), i64 12, i1 false)
+}
+
+__kernel void initializer_cast_is_valid_crash() {
+// PRIVATE0: %v512 = alloca [64 x i8], align 1
+// PRIVATE0: %0 = bitcast [64 x i8]* %v512 to i8*
+// PRIVATE0: call void @llvm.memset.p0i8.i32(i8* align 1 %0, i8 0, i32 64, i1 
false)
+// PRIVATE0: %1 = bitcast i8* %0 to [64 x i8]*
+
+
+// PRIVATE5: %v512 = alloca [64 x i8], align 1, addrspace(5)
+// PRIVATE5: %0 = bitcast [64 x i8] addrspace(5)* %v512 to i8 addrspace(5)*
+// PRIVATE5: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 1 %0, i8 
0, i64 64, i1 false)
+// PRIVATE5: %1 = bitcast i8 addrspace(5)* %0 to [64 x i8] addrspace(5)*
+  unsigned char v512[64] = {
+  
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+  
0x

r324748 - AMDGPU: Update for datalayout change

2018-02-09 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Fri Feb  9 08:58:41 2018
New Revision: 324748

URL: http://llvm.org/viewvc/llvm-project?rev=324748&view=rev
Log:
AMDGPU: Update for datalayout change

Modified:
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
cfe/trunk/test/CodeGen/target-data.c
cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=324748&r1=324747&r2=324748&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Fri Feb  9 08:58:41 2018
@@ -33,12 +33,12 @@ static const char *const DataLayoutStrin
 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
 
 static const char *const DataLayoutStringSIPrivateIsZero =
-"e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
+"e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
 static const char *const DataLayoutStringSIGenericIsZero =
-"e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32"
+"e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32"
 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
 
@@ -144,7 +144,7 @@ const char *const AMDGPUTargetInfo::GCCR
   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
-  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 
+  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
   "flat_scratch_lo", "flat_scratch_hi"
 };
 

Modified: cfe/trunk/test/CodeGen/target-data.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/target-data.c?rev=324748&r1=324747&r2=324748&view=diff
==
--- cfe/trunk/test/CodeGen/target-data.c (original)
+++ cfe/trunk/test/CodeGen/target-data.c Fri Feb  9 08:58:41 2018
@@ -132,12 +132,12 @@
 
 // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm 
%s \
 // RUN: | FileCheck %s -check-prefix=R600SI
-// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
+// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
 
 // Test default -target-cpu
 // RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \
 // RUN: | FileCheck %s -check-prefix=R600SIDefault
-// R600SIDefault: target datalayout = 
"e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
+// R600SIDefault: target datalayout = 
"e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
 
 // RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=AARCH64

Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl?rev=324748&r1=324747&r2=324748&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl Fri Feb  9 08:58:41 2018
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck 
%s
 
-// CHECK: target datalayout = 
"e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
+// CHECK: target datalayout = 
"e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
 void foo(void) {}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r352443 - AMDGPU: Add ds append/consume builtins

2019-01-28 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Mon Jan 28 15:59:18 2019
New Revision: 352443

URL: http://llvm.org/viewvc/llvm-project?rev=352443&view=rev
Log:
AMDGPU: Add ds append/consume builtins

Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=352443&r1=352442&r2=352443&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Mon Jan 28 15:59:18 2019
@@ -98,6 +98,8 @@ BUILTIN(__builtin_amdgcn_fmed3f, "",
 BUILTIN(__builtin_amdgcn_ds_faddf, "ff*3fIiIiIb", "n")
 BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3fIiIiIb", "n")
 BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n")
+BUILTIN(__builtin_amdgcn_ds_append, "ii*3", "n")
+BUILTIN(__builtin_amdgcn_ds_consume, "ii*3", "n")
 
 
//===--===//
 // CI+ only builtins.

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=352443&r1=352442&r2=352443&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jan 28 15:59:18 2019
@@ -12478,6 +12478,14 @@ Value *CodeGenFunction::EmitAMDGPUBuilti
   case AMDGPU::BI__builtin_amdgcn_fmed3f:
   case AMDGPU::BI__builtin_amdgcn_fmed3h:
 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
+  case AMDGPU::BI__builtin_amdgcn_ds_append:
+  case AMDGPU::BI__builtin_amdgcn_ds_consume: {
+Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
+  Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
+Value *Src0 = EmitScalarExpr(E->getArg(0));
+Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
+return Builder.CreateCall(F, { Src0, Builder.getFalse() });
+  }
   case AMDGPU::BI__builtin_amdgcn_read_exec: {
 CallInst *CI = cast(
   EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=352443&r1=352442&r2=352443&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Mon Jan 28 15:59:18 2019
@@ -536,6 +536,18 @@ void test_s_getpc(global ulong* out)
   *out = __builtin_amdgcn_s_getpc();
 }
 
+// CHECK-LABEL: @test_ds_append_lds(
+// CHECK: call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %ptr, i1 
false)
+kernel void test_ds_append_lds(global int* out, local int* ptr) {
+  *out = __builtin_amdgcn_ds_append(ptr);
+}
+
+// CHECK-LABEL: @test_ds_consume_lds(
+// CHECK: call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %ptr, i1 
false)
+kernel void test_ds_consume_lds(global int* out, local int* ptr) {
+  *out = __builtin_amdgcn_ds_consume(ptr);
+}
+
 // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
 // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
 // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: r350970 - [Darwin][Driver] Don't pass a file as object_path_lto during ThinLTO

2019-01-29 Thread Matt Arsenault via cfe-commits


> On Jan 11, 2019, at 4:16 PM, Steven Wu via cfe-commits 
>  wrote:
> 
> Author: steven_wu
> Date: Fri Jan 11 13:16:04 2019
> New Revision: 350970
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=350970&view=rev
> Log:
> [Darwin][Driver] Don't pass a file as object_path_lto during ThinLTO
> 
> Summary:
> After r327851, Driver::GetTemporaryPath will create the file rather than
> just create a potientially unqine filename. If clang driver pass the
> file as parameter as -object_path_lto, ld64 will pass it back to libLTO
> as GeneratedObjectsDirectory, which is going to cause a LLVM ERROR if it
> is not a directory.
> Now during thinLTO, pass a temp directory path to linker instread.
> 
> rdar://problem/47194182
> 
> Reviewers: arphaman, dexonsmith
> 
> Reviewed By: arphaman
> 
> Subscribers: mehdi_amini, inglorion, jkorous, cfe-commits
> 
> Differential Revision: https://reviews.llvm.org/D56608
> 
> Modified:
>cfe/trunk/include/clang/Driver/Driver.h
>cfe/trunk/lib/Driver/Driver.cpp
>cfe/trunk/lib/Driver/ToolChains/Darwin.cpp
>cfe/trunk/test/Driver/darwin-ld-lto.c
> 
> Modified: cfe/trunk/include/clang/Driver/Driver.h
> URL: 
> http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Driver.h?rev=350970&r1=350969&r2=350970&view=diff
> ==
> --- cfe/trunk/include/clang/Driver/Driver.h (original)
> +++ cfe/trunk/include/clang/Driver/Driver.h Fri Jan 11 13:16:04 2019
> @@ -505,6 +505,10 @@ public:
>   /// GCC goes to extra lengths here to be a bit more robust.
>   std::string GetTemporaryPath(StringRef Prefix, StringRef Suffix) const;
> 
> +  /// GetTemporaryDirectory - Return the pathname of a temporary directory to
> +  /// use as part of compilation; the directory will have the given prefix.
> +  std::string GetTemporaryDirectory(StringRef Prefix) const;
> +
>   /// Return the pathname of the pch file in clang-cl mode.
>   std::string GetClPchPath(Compilation &C, StringRef BaseName) const;
> 
> 
> Modified: cfe/trunk/lib/Driver/Driver.cpp
> URL: 
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=350970&r1=350969&r2=350970&view=diff
> ==
> --- cfe/trunk/lib/Driver/Driver.cpp (original)
> +++ cfe/trunk/lib/Driver/Driver.cpp Fri Jan 11 13:16:04 2019
> @@ -4478,6 +4478,17 @@ std::string Driver::GetTemporaryPath(Str
>   return Path.str();
> }
> 
> +std::string Driver::GetTemporaryDirectory(StringRef Prefix) const {
> +  SmallString<128> Path;
> +  std::error_code EC = llvm::sys::fs::createUniqueDirectory(Prefix, Path);
> +  if (EC) {
> +Diag(clang::diag::err_unable_to_make_temp) << EC.message();
> +return "";
> +  }
> +
> +  return Path.str();
> +}
> +
> std::string Driver::GetClPchPath(Compilation &C, StringRef BaseName) const {
>   SmallString<128> Output;
>   if (Arg *FpArg = C.getArgs().getLastArg(options::OPT__SLASH_Fp)) {
> 
> Modified: cfe/trunk/lib/Driver/ToolChains/Darwin.cpp
> URL: 
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Darwin.cpp?rev=350970&r1=350969&r2=350970&view=diff
> ==
> --- cfe/trunk/lib/Driver/ToolChains/Darwin.cpp (original)
> +++ cfe/trunk/lib/Driver/ToolChains/Darwin.cpp Fri Jan 11 13:16:04 2019
> @@ -224,13 +224,20 @@ void darwin::Linker::AddLinkArgs(Compila
>options::OPT_fno_application_extension, false))
> CmdArgs.push_back("-application_extension");
> 
> -  if (D.isUsingLTO()) {
> -// If we are using LTO, then automatically create a temporary file path 
> for
> -// the linker to use, so that it's lifetime will extend past a possible
> -// dsymutil step.
> -if (Version[0] >= 116 && NeedsTempPath(Inputs)) {
> -  const char *TmpPath = C.getArgs().MakeArgString(
> -  D.GetTemporaryPath("cc", 
> types::getTypeTempSuffix(types::TY_Object)));
> +  if (D.isUsingLTO() && Version[0] >= 116 && NeedsTempPath(Inputs)) {
> +std::string TmpPathName;
> +if (D.getLTOMode() == LTOK_Full) {
> +  // If we are using full LTO, then automatically create a temporary file
> +  // path for the linker to use, so that it's lifetime will extend past a
> +  // possible dsymutil step.
> +  TmpPathName =
> +  D.GetTemporaryPath("cc", 
> types::getTypeTempSuffix(types::TY_Object));
> +} else if (D.getLTOMode() == LTOK_Thin)
> +  // If we are using thin LTO, then create a directory instead.
> +  TmpPathName = D.GetTemporaryDirectory("thinlto");
> +
> +if (!TmpPathName.empty()) {
> +  auto *TmpPath = C.getArgs().MakeArgString(TmpPathName);
>   C.addTempFile(TmpPath);
>   CmdArgs.push_back("-object_path_lto");
>   CmdArgs.push_back(TmpPath);
> 
> Modified: cfe/trunk/test/Driver/darwin-ld-lto.c
> URL: 
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/darwin-ld-lto.c?rev=350970&r1=350969

r352539 - Revert "OpenCL: Extend argument promotion rules to vector types"

2019-01-29 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Tue Jan 29 12:49:47 2019
New Revision: 352539

URL: http://llvm.org/viewvc/llvm-project?rev=352539&view=rev
Log:
Revert "OpenCL: Extend argument promotion rules to vector types"

This reverts r348083. This was based on a misreading of the spec
for printf specifiers.

Also revert r343653, as without a subsequent patch, a correctly
specified format for a vector will incorrectly warn.

Fixes bug 40491.

Modified:
cfe/trunk/lib/Headers/opencl-c.h
cfe/trunk/lib/Sema/SemaExpr.cpp
cfe/trunk/test/CodeGenOpenCL/printf.cl
cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl
cfe/trunk/test/SemaOpenCL/printf-format-strings.cl

Modified: cfe/trunk/lib/Headers/opencl-c.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/opencl-c.h?rev=352539&r1=352538&r2=352539&view=diff
==
--- cfe/trunk/lib/Headers/opencl-c.h (original)
+++ cfe/trunk/lib/Headers/opencl-c.h Tue Jan 29 12:49:47 2019
@@ -14469,7 +14469,7 @@ half16 __ovld __cnfn shuffle2(half16 x,
 #if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
 // OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf
 
-int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 
2)));
+int printf(__constant const char* st, ...);
 #endif
 
 // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write 
Functions

Modified: cfe/trunk/lib/Sema/SemaExpr.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=352539&r1=352538&r2=352539&view=diff
==
--- cfe/trunk/lib/Sema/SemaExpr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaExpr.cpp Tue Jan 29 12:49:47 2019
@@ -737,33 +737,20 @@ ExprResult Sema::DefaultArgumentPromotio
 return ExprError();
   E = Res.get();
 
-  QualType ScalarTy = Ty;
-  unsigned NumElts = 0;
-  if (const ExtVectorType *VecTy = Ty->getAs()) {
-NumElts = VecTy->getNumElements();
-ScalarTy = VecTy->getElementType();
-  }
-
   // If this is a 'float'  or '__fp16' (CVR qualified or typedef)
   // promote to double.
   // Note that default argument promotion applies only to float (and
   // half/fp16); it does not apply to _Float16.
-  const BuiltinType *BTy = ScalarTy->getAs();
+  const BuiltinType *BTy = Ty->getAs();
   if (BTy && (BTy->getKind() == BuiltinType::Half ||
   BTy->getKind() == BuiltinType::Float)) {
 if (getLangOpts().OpenCL &&
 !getOpenCLOptions().isEnabled("cl_khr_fp64")) {
-  if (BTy->getKind() == BuiltinType::Half) {
-QualType Ty = Context.FloatTy;
-if (NumElts != 0)
-  Ty = Context.getExtVectorType(Ty, NumElts);
-E = ImpCastExprToType(E, Ty, CK_FloatingCast).get();
-  }
+if (BTy->getKind() == BuiltinType::Half) {
+E = ImpCastExprToType(E, Context.FloatTy, CK_FloatingCast).get();
+}
 } else {
-  QualType Ty = Context.DoubleTy;
-  if (NumElts != 0)
-Ty = Context.getExtVectorType(Ty, NumElts);
-  E = ImpCastExprToType(E, Ty, CK_FloatingCast).get();
+  E = ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast).get();
 }
   }
 

Modified: cfe/trunk/test/CodeGenOpenCL/printf.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/printf.cl?rev=352539&r1=352538&r2=352539&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/printf.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/printf.cl Tue Jan 29 12:49:47 2019
@@ -12,28 +12,26 @@ int printf(__constant const char* st, ..
 
 
 // ALL-LABEL: @test_printf_float2(
-// FP64: %conv = fpext <2 x float> %0 to <2 x double>
-// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 
addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, 
i32 0, i32 0), <2 x double> %conv)
+// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 
addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, 
i32 0, i32 0), <2 x float> %0)
 
-// NOFP64:  call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 
addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, 
i32 0, i32 0), <2 x float> %0)
+
+// NOFP64:  call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 
addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, 
i32 0, i32 0), <2 x float> %0)
 kernel void test_printf_float2(float2 arg) {
-  printf("%v2f", arg);
+  printf("%v2hlf", arg);
 }
 
 // ALL-LABEL: @test_printf_half2(
-// FP64: %conv = fpext <2 x half> %0 to <2 x double>
-// FP64:  %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 
addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, 
i32 0, i32 0), <2 x double> %conv) #2
+// FP64:  %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 
addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.s

r352540 - OpenCL: Use length modifier for warning on vector printf arguments

2019-01-29 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Tue Jan 29 12:49:54 2019
New Revision: 352540

URL: http://llvm.org/viewvc/llvm-project?rev=352540&view=rev
Log:
OpenCL: Use length modifier for warning on vector printf arguments

Re-enable format string warnings on printf.

The warnings are still incomplete. Apparently it is undefined to use a
vector specifier without a length modifier, which is not currently
warned on. Additionally, type warnings appear to not be working with
the hh modifier, and aren't warning on all of the special restrictions
from c99 printf.

Modified:
cfe/trunk/include/clang/AST/FormatString.h
cfe/trunk/lib/AST/FormatString.cpp
cfe/trunk/lib/AST/PrintfFormatString.cpp
cfe/trunk/lib/AST/ScanfFormatString.cpp
cfe/trunk/lib/Headers/opencl-c.h
cfe/trunk/lib/Sema/SemaChecking.cpp
cfe/trunk/test/Sema/format-strings.c
cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl
cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl
cfe/trunk/test/SemaOpenCL/printf-format-strings.cl

Modified: cfe/trunk/include/clang/AST/FormatString.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/FormatString.h?rev=352540&r1=352539&r2=352540&view=diff
==
--- cfe/trunk/include/clang/AST/FormatString.h (original)
+++ cfe/trunk/include/clang/AST/FormatString.h Tue Jan 29 12:49:54 2019
@@ -67,6 +67,7 @@ public:
 None,
 AsChar,   // 'hh'
 AsShort,  // 'h'
+AsShortLong,  // 'hl' (OpenCL float/int vector element)
 AsLong,   // 'l'
 AsLongLong,   // 'll'
 AsQuad,   // 'q' (BSD, deprecated, for 64-bit integer types)
@@ -436,7 +437,8 @@ public:
 
   bool usesPositionalArg() const { return UsesPositionalArg; }
 
-  bool hasValidLengthModifier(const TargetInfo &Target) const;
+  bool hasValidLengthModifier(const TargetInfo &Target,
+  const LangOptions &LO) const;
 
   bool hasStandardLengthModifier() const;
 

Modified: cfe/trunk/lib/AST/FormatString.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/FormatString.cpp?rev=352540&r1=352539&r2=352540&view=diff
==
--- cfe/trunk/lib/AST/FormatString.cpp (original)
+++ cfe/trunk/lib/AST/FormatString.cpp Tue Jan 29 12:49:54 2019
@@ -223,6 +223,9 @@ clang::analyze_format_string::ParseLengt
   if (I != E && *I == 'h') {
 ++I;
 lmKind = LengthModifier::AsChar;
+  } else if (I != E && *I == 'l' && LO.OpenCL) {
+++I;
+lmKind = LengthModifier::AsShortLong;
   } else {
 lmKind = LengthModifier::AsShort;
   }
@@ -487,7 +490,8 @@ ArgType::matchesType(ASTContext &C, Qual
 }
 
 ArgType ArgType::makeVectorType(ASTContext &C, unsigned NumElts) const {
-  if (K != SpecificTy) // Won't be a valid vector element type.
+  // Check for valid vector element types.
+  if (T.isNull())
 return ArgType::Invalid();
 
   QualType Vec = C.getExtVectorType(T, NumElts);
@@ -572,6 +576,8 @@ analyze_format_string::LengthModifier::t
 return "hh";
   case AsShort:
 return "h";
+  case AsShortLong:
+return "hl";
   case AsLong: // or AsWideChar
 return "l";
   case AsLongLong:
@@ -707,13 +713,18 @@ void OptionalAmount::toString(raw_ostrea
   }
 }
 
-bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target) const {
+bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target,
+ const LangOptions &LO) const {
   switch (LM.getKind()) {
 case LengthModifier::None:
   return true;
 
 // Handle most integer flags
 case LengthModifier::AsShort:
+  // Length modifier only applies to FP vectors.
+  if (LO.OpenCL && CS.isDoubleArg())
+return !VectorNumElts.isInvalid();
+
   if (Target.getTriple().isOSMSVCRT()) {
 switch (CS.getKind()) {
   case ConversionSpecifier::cArg:
@@ -752,8 +763,18 @@ bool FormatSpecifier::hasValidLengthModi
   return false;
   }
 
+case LengthModifier::AsShortLong:
+  return LO.OpenCL && !VectorNumElts.isInvalid();
+
 // Handle 'l' flag
 case LengthModifier::AsLong: // or AsWideChar
+  if (CS.isDoubleArg()) {
+// Invalid for OpenCL FP scalars.
+if (LO.OpenCL && VectorNumElts.isInvalid())
+  return false;
+return true;
+  }
+
   switch (CS.getKind()) {
 case ConversionSpecifier::dArg:
 case ConversionSpecifier::DArg:
@@ -764,14 +785,6 @@ bool FormatSpecifier::hasValidLengthModi
 case ConversionSpecifier::UArg:
 case ConversionSpecifier::xArg:
 case ConversionSpecifier::XArg:
-case ConversionSpecifier::aArg:
-case ConversionSpecifier::AArg:
-case ConversionSpecifier::fArg:
-case ConversionSpecifier::FArg:
-case ConversionSpecifier::eArg:
-case ConversionSpecifier::EArg:
-case 

r352544 - OpenCL: Try to fix bot test failure

2019-01-29 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Tue Jan 29 13:14:56 2019
New Revision: 352544

URL: http://llvm.org/viewvc/llvm-project?rev=352544&view=rev
Log:
OpenCL: Try to fix bot test failure

Modified:
cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl

Modified: cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl?rev=352544&r1=352543&r2=352544&view=diff
==
--- cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl (original)
+++ cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl Tue Jan 29 13:14:56 2019
@@ -3,6 +3,8 @@
 // RUN: %clang_cc1 -cl-std=CL1.2 -fsyntax-only -pedantic -Wall -Werror %t
 // RUN: %clang_cc1 -cl-std=CL1.2 -E -o - %t | FileCheck %s
 
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
 typedef __attribute__((ext_vector_type(4))) char char4;
 typedef __attribute__((ext_vector_type(4))) short short4;
 typedef __attribute__((ext_vector_type(4))) int int4;


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r346806 - OpenCL: Don't warn on v printf modifier

2018-11-13 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Tue Nov 13 14:30:35 2018
New Revision: 346806

URL: http://llvm.org/viewvc/llvm-project?rev=346806&view=rev
Log:
OpenCL: Don't warn on v printf modifier

This avoids spurious warnings, but could use
a lot of work. For example the number of vector
elements is not verified, and the passed
value type is not checked.

Fixes bug 39486

Added:
cfe/trunk/test/SemaOpenCL/printf-format-strings.cl
Modified:
cfe/trunk/include/clang/AST/FormatString.h
cfe/trunk/lib/AST/FormatString.cpp
cfe/trunk/lib/AST/PrintfFormatString.cpp
cfe/trunk/test/Sema/format-strings.c

Modified: cfe/trunk/include/clang/AST/FormatString.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/FormatString.h?rev=346806&r1=346805&r2=346806&view=diff
==
--- cfe/trunk/include/clang/AST/FormatString.h (original)
+++ cfe/trunk/include/clang/AST/FormatString.h Tue Nov 13 14:30:35 2018
@@ -166,6 +166,8 @@ public:
 
 ZArg, // MS extension
 
+VArg, // OpenCL vectors
+
 // Objective-C specific specifiers.
 ObjCObjArg, // '@'
 ObjCBeg = ObjCObjArg,

Modified: cfe/trunk/lib/AST/FormatString.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/FormatString.cpp?rev=346806&r1=346805&r2=346806&view=diff
==
--- cfe/trunk/lib/AST/FormatString.cpp (original)
+++ cfe/trunk/lib/AST/FormatString.cpp Tue Nov 13 14:30:35 2018
@@ -618,6 +618,9 @@ const char *ConversionSpecifier::toStrin
 
   // MS specific specifiers.
   case ZArg: return "Z";
+
+ // OpenCL specific specifiers.
+  case VArg: return "v";
   }
   return nullptr;
 }
@@ -875,6 +878,8 @@ bool FormatSpecifier::hasStandardConvers
 case ConversionSpecifier::CArg:
 case ConversionSpecifier::SArg:
   return LangOpt.ObjC;
+case ConversionSpecifier::VArg:
+  return LangOpt.OpenCL;
 case ConversionSpecifier::InvalidSpecifier:
 case ConversionSpecifier::FreeBSDbArg:
 case ConversionSpecifier::FreeBSDDArg:

Modified: cfe/trunk/lib/AST/PrintfFormatString.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/PrintfFormatString.cpp?rev=346806&r1=346805&r2=346806&view=diff
==
--- cfe/trunk/lib/AST/PrintfFormatString.cpp (original)
+++ cfe/trunk/lib/AST/PrintfFormatString.cpp Tue Nov 13 14:30:35 2018
@@ -362,6 +362,12 @@ static PrintfSpecifierResult ParsePrintf
 case 'Z':
   if (Target.getTriple().isOSMSVCRT())
 k = ConversionSpecifier::ZArg;
+  break;
+// OpenCL specific.
+case 'v':
+  if (LO.OpenCL)
+k = ConversionSpecifier::VArg;
+  break;
   }
 
   // Check to see if we used the Objective-C modifier flags with
@@ -1026,6 +1032,7 @@ bool PrintfSpecifier::hasValidPrecision(
   case ConversionSpecifier::FreeBSDrArg:
   case ConversionSpecifier::FreeBSDyArg:
   case ConversionSpecifier::PArg:
+  case ConversionSpecifier::VArg:
 return true;
 
   default:

Modified: cfe/trunk/test/Sema/format-strings.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/format-strings.c?rev=346806&r1=346805&r2=346806&view=diff
==
--- cfe/trunk/test/Sema/format-strings.c (original)
+++ cfe/trunk/test/Sema/format-strings.c Tue Nov 13 14:30:35 2018
@@ -613,6 +613,11 @@ void pr12761(char c) {
   printf("%hhx", c);
 }
 
+void test_opencl_vector_format(int x) {
+  printf("%v4d", x); // expected-warning{{invalid conversion specifier 'v'}}
+  printf("%vd", x); // expected-warning{{invalid conversion specifier 'v'}}
+  printf("%0vd", x); // expected-warning{{invalid conversion specifier 'v'}}
+}
 
 // Test that we correctly merge the format in both orders.
 extern void test14_foo(const char *, const char *, ...)

Added: cfe/trunk/test/SemaOpenCL/printf-format-strings.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/printf-format-strings.cl?rev=346806&view=auto
==
--- cfe/trunk/test/SemaOpenCL/printf-format-strings.cl (added)
+++ cfe/trunk/test/SemaOpenCL/printf-format-strings.cl Tue Nov 13 14:30:35 2018
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -cl-std=CL1.2 -fsyntax-only -verify %s
+
+typedef __attribute__((ext_vector_type(2))) float float2;
+typedef __attribute__((ext_vector_type(4))) float float4;
+typedef __attribute__((ext_vector_type(4))) int int4;
+
+int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 
2)));
+
+kernel void format_v4f32(float4 arg)
+{
+printf("%v4f\n", arg); // expected-no-diagnostics
+}
+
+kernel void format_v4f32_wrong_num_elts(float2 arg)
+{
+printf("%v4f\n", arg); // expected-no-diagnostics
+}
+
+kernel void vector_precision_modifier_v4f32(float4 arg)
+{
+printf("%.2v4f\n", arg); // expected-no-diagnostics
+}
+

r347873 - Mark __builtin_shufflevector as using custom type checking

2018-11-29 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Thu Nov 29 07:45:05 2018
New Revision: 347873

URL: http://llvm.org/viewvc/llvm-project?rev=347873&view=rev
Log:
Mark __builtin_shufflevector as using custom type checking

The custom handling seems to all be implemented already.
This avoids regressions in a future patch when float vectors
are ordinarily promoted to double vectors in variadic calls.

Modified:
cfe/trunk/include/clang/Basic/Builtins.def

Modified: cfe/trunk/include/clang/Basic/Builtins.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Builtins.def?rev=347873&r1=347872&r2=347873&view=diff
==
--- cfe/trunk/include/clang/Basic/Builtins.def (original)
+++ cfe/trunk/include/clang/Basic/Builtins.def Thu Nov 29 07:45:05 2018
@@ -538,7 +538,7 @@ BUILTIN(__builtin_readcyclecounter, "ULL
 BUILTIN(__builtin_trap, "v", "nr")
 BUILTIN(__builtin_debugtrap, "v", "n")
 BUILTIN(__builtin_unreachable, "v", "nr")
-BUILTIN(__builtin_shufflevector, "v."   , "nc")
+BUILTIN(__builtin_shufflevector, "v."   , "nct")
 BUILTIN(__builtin_convertvector, "v."   , "nct")
 BUILTIN(__builtin_alloca, "v*z"   , "Fn")
 BUILTIN(__builtin_alloca_with_align, "v*zIz", "Fn")


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r348083 - OpenCL: Extend argument promotion rules to vector types

2018-12-01 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Sat Dec  1 13:56:10 2018
New Revision: 348083

URL: http://llvm.org/viewvc/llvm-project?rev=348083&view=rev
Log:
OpenCL: Extend argument promotion rules to vector types

The spec is ambiguous on whether vector types are allowed to be
implicitly converted. The only legal context I think this can
be used for OpenCL is printf, where it seems necessary.

Added:
cfe/trunk/test/CodeGenOpenCL/printf.cl
Modified:
cfe/trunk/lib/Sema/SemaExpr.cpp

Modified: cfe/trunk/lib/Sema/SemaExpr.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=348083&r1=348082&r2=348083&view=diff
==
--- cfe/trunk/lib/Sema/SemaExpr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaExpr.cpp Sat Dec  1 13:56:10 2018
@@ -730,20 +730,33 @@ ExprResult Sema::DefaultArgumentPromotio
 return ExprError();
   E = Res.get();
 
+  QualType ScalarTy = Ty;
+  unsigned NumElts = 0;
+  if (const ExtVectorType *VecTy = Ty->getAs()) {
+NumElts = VecTy->getNumElements();
+ScalarTy = VecTy->getElementType();
+  }
+
   // If this is a 'float'  or '__fp16' (CVR qualified or typedef)
   // promote to double.
   // Note that default argument promotion applies only to float (and
   // half/fp16); it does not apply to _Float16.
-  const BuiltinType *BTy = Ty->getAs();
+  const BuiltinType *BTy = ScalarTy->getAs();
   if (BTy && (BTy->getKind() == BuiltinType::Half ||
   BTy->getKind() == BuiltinType::Float)) {
 if (getLangOpts().OpenCL &&
 !getOpenCLOptions().isEnabled("cl_khr_fp64")) {
-if (BTy->getKind() == BuiltinType::Half) {
-E = ImpCastExprToType(E, Context.FloatTy, CK_FloatingCast).get();
-}
+  if (BTy->getKind() == BuiltinType::Half) {
+QualType Ty = Context.FloatTy;
+if (NumElts != 0)
+  Ty = Context.getExtVectorType(Ty, NumElts);
+E = ImpCastExprToType(E, Ty, CK_FloatingCast).get();
+  }
 } else {
-  E = ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast).get();
+  QualType Ty = Context.DoubleTy;
+  if (NumElts != 0)
+Ty = Context.getExtVectorType(Ty, NumElts);
+  E = ImpCastExprToType(E, Ty, CK_FloatingCast).get();
 }
   }
 

Added: cfe/trunk/test/CodeGenOpenCL/printf.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/printf.cl?rev=348083&view=auto
==
--- cfe/trunk/test/CodeGenOpenCL/printf.cl (added)
+++ cfe/trunk/test/CodeGenOpenCL/printf.cl Sat Dec  1 13:56:10 2018
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-+cl_khr_fp64 -triple 
spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck 
-check-prefixes=FP64,ALL %s
+// RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -triple 
spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck 
-check-prefixes=NOFP64,ALL %s
+
+typedef __attribute__((ext_vector_type(2))) float float2;
+typedef __attribute__((ext_vector_type(2))) half half2;
+
+#ifdef cl_khr_fp64
+typedef __attribute__((ext_vector_type(2))) double double2;
+#endif
+
+int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 
2)));
+
+
+// ALL-LABEL: @test_printf_float2(
+// FP64: %conv = fpext <2 x float> %0 to <2 x double>
+// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 
addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, 
i32 0, i32 0), <2 x double> %conv)
+
+// NOFP64:  call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 
addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, 
i32 0, i32 0), <2 x float> %0)
+kernel void test_printf_float2(float2 arg) {
+  printf("%v2f", arg);
+}
+
+// ALL-LABEL: @test_printf_half2(
+// FP64: %conv = fpext <2 x half> %0 to <2 x double>
+// FP64:  %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 
addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, 
i32 0, i32 0), <2 x double> %conv) #2
+
+// NOFP64: %conv = fpext <2 x half> %0 to <2 x float>
+// NOFP64:  %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 
addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, 
i32 0, i32 0), <2 x float> %conv) #2
+kernel void test_printf_half2(half2 arg) {
+  printf("%v2f", arg);
+}
+
+#ifdef cl_khr_fp64
+// FP64-LABEL: @test_printf_double2(
+// FP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* 
getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), 
<2 x double> %0) #2
+kernel void test_printf_double2(double2 arg) {
+  printf("%v2f", arg);
+}
+#endif


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r348084 - OpenCL: Improve vector printf warnings

2018-12-01 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Sat Dec  1 14:16:27 2018
New Revision: 348084

URL: http://llvm.org/viewvc/llvm-project?rev=348084&view=rev
Log:
OpenCL: Improve vector printf warnings

The vector modifier is considered separate, so
don't treat it as a conversion specifier.

This is still not warning on some cases, like
using a type that isn't a valid vector element.

Fixes bug 39652

Added:
cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl
Modified:
cfe/trunk/include/clang/AST/FormatString.h
cfe/trunk/lib/AST/FormatString.cpp
cfe/trunk/lib/AST/FormatStringParsing.h
cfe/trunk/lib/AST/PrintfFormatString.cpp
cfe/trunk/test/SemaOpenCL/printf-format-strings.cl

Modified: cfe/trunk/include/clang/AST/FormatString.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/FormatString.h?rev=348084&r1=348083&r2=348084&view=diff
==
--- cfe/trunk/include/clang/AST/FormatString.h (original)
+++ cfe/trunk/include/clang/AST/FormatString.h Sat Dec  1 14:16:27 2018
@@ -166,8 +166,6 @@ public:
 
 ZArg, // MS extension
 
-VArg, // OpenCL vectors
-
 // Objective-C specific specifiers.
 ObjCObjArg, // '@'
 ObjCBeg = ObjCObjArg,
@@ -305,6 +303,8 @@ public:
 
   QualType getRepresentativeType(ASTContext &C) const;
 
+  ArgType makeVectorType(ASTContext &C, unsigned NumElts) const;
+
   std::string getRepresentativeTypeName(ASTContext &C) const;
 };
 
@@ -324,6 +324,10 @@ public:
   : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
   UsesPositionalArg(0), UsesDotPrefix(0) {}
 
+  explicit OptionalAmount(unsigned Amount)
+: start(nullptr), length(0), hs(Constant), amt(Amount),
+UsesPositionalArg(false), UsesDotPrefix(false) {}
+
   bool isInvalid() const {
 return hs == Invalid;
   }
@@ -381,6 +385,8 @@ protected:
   LengthModifier LM;
   OptionalAmount FieldWidth;
   ConversionSpecifier CS;
+  OptionalAmount VectorNumElts;
+
   /// Positional arguments, an IEEE extension:
   ///  IEEE Std 1003.1, 2004 Edition
   ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
@@ -388,7 +394,8 @@ protected:
   unsigned argIndex;
 public:
   FormatSpecifier(bool isPrintf)
-: CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
+: CS(isPrintf), VectorNumElts(false),
+  UsesPositionalArg(false), argIndex(0) {}
 
   void setLengthModifier(LengthModifier lm) {
 LM = lm;
@@ -416,6 +423,14 @@ public:
 return FieldWidth;
   }
 
+  void setVectorNumElts(const OptionalAmount &Amt) {
+VectorNumElts = Amt;
+  }
+
+  const OptionalAmount &getVectorNumElts() const {
+return VectorNumElts;
+  }
+
   void setFieldWidth(const OptionalAmount &Amt) {
 FieldWidth = Amt;
   }
@@ -480,6 +495,9 @@ class PrintfSpecifier : public analyze_f
   OptionalFlag IsSensitive;  // '{sensitive}'
   OptionalAmount Precision;
   StringRef MaskType;
+
+  ArgType getScalarArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
+
 public:
   PrintfSpecifier()
   : FormatSpecifier(/* isPrintf = */ true), HasThousandsGrouping("'"),

Modified: cfe/trunk/lib/AST/FormatString.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/FormatString.cpp?rev=348084&r1=348083&r2=348084&view=diff
==
--- cfe/trunk/lib/AST/FormatString.cpp (original)
+++ cfe/trunk/lib/AST/FormatString.cpp Sat Dec  1 14:16:27 2018
@@ -179,6 +179,36 @@ clang::analyze_format_string::ParseArgPo
 }
 
 bool
+clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H,
+  FormatSpecifier &FS,
+  const char *&I,
+  const char *E,
+  const LangOptions &LO) {
+  if (!LO.OpenCL)
+return false;
+
+  const char *Start = I;
+  if (*I == 'v') {
+++I;
+
+if (I == E) {
+  H.HandleIncompleteSpecifier(Start, E - Start);
+  return true;
+}
+
+OptionalAmount NumElts = ParseAmount(I, E);
+if (NumElts.getHowSpecified() != OptionalAmount::Constant) {
+  H.HandleIncompleteSpecifier(Start, E - Start);
+  return true;
+}
+
+FS.setVectorNumElts(NumElts);
+  }
+
+  return false;
+}
+
+bool
 clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
   const char *&I,
   const char *E,
@@ -457,6 +487,14 @@ ArgType::matchesType(ASTContext &C, Qual
   llvm_unreachable("Invalid ArgType Kind!");
 }
 
+ArgType ArgType::makeVectorType(ASTContext &C, unsigned NumElts) const {
+  if (K != SpecificTy) // Won't be a valid vector element type.
+return ArgType::Invalid();
+
+  QualType Vec = C.getExtVectorType(T, NumElts);
+  return ArgType(Vec, Name);
+}
+
 QualType ArgType::getRepresentativeType(A

r348809 - Update test for instcombine change

2018-12-10 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Mon Dec 10 15:02:40 2018
New Revision: 348809

URL: http://llvm.org/viewvc/llvm-project?rev=348809&view=rev
Log:
Update test for instcombine change

Modified:
cfe/trunk/test/CodeGen/vector.c

Modified: cfe/trunk/test/CodeGen/vector.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/vector.c?rev=348809&r1=348808&r2=348809&view=diff
==
--- cfe/trunk/test/CodeGen/vector.c (original)
+++ cfe/trunk/test/CodeGen/vector.c Mon Dec 10 15:02:40 2018
@@ -70,7 +70,7 @@ vec_int1 lax_vector_compare1(int x, vec_
 }
 
 // CHECK: define i32 @lax_vector_compare1(i32 {{.*}}, i32 {{.*}})
-// CHECK: icmp eq <1 x i32>
+// CHECK: icmp eq i32
 
 typedef int vec_int2 __attribute__((vector_size(8)));
 vec_int2 lax_vector_compare2(long long x, vec_int2 y) {


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r356354 - Add testcase from bug 41079

2019-03-17 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Sun Mar 17 16:16:31 2019
New Revision: 356354

URL: http://llvm.org/viewvc/llvm-project?rev=356354&view=rev
Log:
Add testcase from bug 41079

Modified:
cfe/trunk/test/CodeGen/builtin-expect.c

Modified: cfe/trunk/test/CodeGen/builtin-expect.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtin-expect.c?rev=356354&r1=356353&r2=356354&view=diff
==
--- cfe/trunk/test/CodeGen/builtin-expect.c (original)
+++ cfe/trunk/test/CodeGen/builtin-expect.c Sun Mar 17 16:16:31 2019
@@ -78,3 +78,20 @@ int switch_cond(int x) {
   return 0;
 }
 
+int variable_expected(int stuff) {
+// ALL-LABEL: define i32 @variable_expected(
+// O1: call i64 @llvm.expect.i64(i64 {{%.*}}, i64 {{%.*}})
+// O0-NOT: @llvm.expect
+
+  int res = 0;
+
+  switch (__builtin_expect(stuff, stuff)) {
+  case 0:
+res = 1;
+break;
+  default:
+break;
+  }
+
+  return res;
+}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r359918 - Ensure there is stack usage in stack size warning test

2019-05-03 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Fri May  3 12:04:14 2019
New Revision: 359918

URL: http://llvm.org/viewvc/llvm-project?rev=359918&view=rev
Log:
Ensure there is stack usage in stack size warning test

r359906 broke this because the only stack usage was from a spill which
can be avoided since the only block is a return.

Modified:
cfe/trunk/test/Misc/backend-stack-frame-diagnostics-fallback.cpp

Modified: cfe/trunk/test/Misc/backend-stack-frame-diagnostics-fallback.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Misc/backend-stack-frame-diagnostics-fallback.cpp?rev=359918&r1=359917&r2=359918&view=diff
==
--- cfe/trunk/test/Misc/backend-stack-frame-diagnostics-fallback.cpp (original)
+++ cfe/trunk/test/Misc/backend-stack-frame-diagnostics-fallback.cpp Fri May  3 
12:04:14 2019
@@ -14,5 +14,7 @@ namespace frameSizeThunkWarning {
 
   // CHECK: warning: stack frame size of {{[0-9]+}} bytes in function 
'frameSizeThunkWarning::B::f'
   // CHECK: warning: stack size limit exceeded ({{[0-9]+}}) in {{[^ ]+}}
-  void B::f() { }
+  void B::f() {
+volatile int x = 0; // Ensure there is stack usage.
+  }
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r343653 - OpenCL: Mark printf format string argument

2018-10-02 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Tue Oct  2 19:01:19 2018
New Revision: 343653

URL: http://llvm.org/viewvc/llvm-project?rev=343653&view=rev
Log:
OpenCL: Mark printf format string argument

Fixes not warning on format string errors.

Added:
cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl
Modified:
cfe/trunk/lib/Headers/opencl-c.h

Modified: cfe/trunk/lib/Headers/opencl-c.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/opencl-c.h?rev=343653&r1=343652&r2=343653&view=diff
==
--- cfe/trunk/lib/Headers/opencl-c.h (original)
+++ cfe/trunk/lib/Headers/opencl-c.h Tue Oct  2 19:01:19 2018
@@ -14462,7 +14462,7 @@ half16 __ovld __cnfn shuffle2(half16 x,
 #if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
 // OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf
 
-int printf(__constant const char* st, ...);
+int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 
2)));
 #endif
 
 // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write 
Functions

Added: cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl?rev=343653&view=auto
==
--- cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl (added)
+++ cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl Tue Oct  2 
19:01:19 2018
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 
-finclude-default-header
+
+// Make sure warnings are produced based on printf format strings.
+
+
+kernel void format_string_warnings(__constant char* arg) {
+
+  printf("%d", arg); // expected-warning {{format specifies type 'int' but the 
argument has type '__constant char *'}}
+
+  printf("not enough arguments %d %d", 4); // expected-warning {{more '%' 
conversions than data arguments}}
+
+  printf("too many arguments", 4); // expected-warning {{data argument not 
used by format string}}
+}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r354624 - AMDGPU: Don't emit debugger subtarget features

2019-02-21 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Thu Feb 21 13:31:43 2019
New Revision: 354624

URL: http://llvm.org/viewvc/llvm-project?rev=354624&view=rev
Log:
AMDGPU: Don't emit debugger subtarget features

Keep the flag around for compatability.

Modified:
cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp
cfe/trunk/test/Driver/amdgpu-features.c

Modified: cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp?rev=354624&r1=354623&r2=354624&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp Thu Feb 21 13:31:43 2019
@@ -38,15 +38,8 @@ void amdgpu::Linker::ConstructJob(Compil
 void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
  const llvm::opt::ArgList &Args,
  std::vector &Features) {
-  if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi)) {
-StringRef value = dAbi->getValue();
-if (value == "1.0") {
-  Features.push_back("+amdgpu-debugger-insert-nops");
-  Features.push_back("+amdgpu-debugger-emit-prologue");
-} else {
-  D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args);
-}
-  }
+  if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi))
+D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args);
 
   handleTargetFeaturesGroup(
 Args, Features, options::OPT_m_amdgpu_Features_Group);

Modified: cfe/trunk/test/Driver/amdgpu-features.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/amdgpu-features.c?rev=354624&r1=354623&r2=354624&view=diff
==
--- cfe/trunk/test/Driver/amdgpu-features.c (original)
+++ cfe/trunk/test/Driver/amdgpu-features.c Thu Feb 21 13:31:43 2019
@@ -4,7 +4,7 @@
 
 // RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=kaveri 
-mamdgpu-debugger-abi=1.0 %s -o - 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-MAMDGPU-DEBUGGER-ABI-1-0 %s
-// CHECK-MAMDGPU-DEBUGGER-ABI-1-0: "-target-feature" 
"+amdgpu-debugger-insert-nops" "-target-feature" 
"+amdgpu-debugger-emit-prologue"
+// CHECK-MAMDGPU-DEBUGGER-ABI-1-0: the clang compiler does not support 
'-mamdgpu-debugger-abi=1.0'
 
 // RUN: %clang -### -target amdgcn -mcpu=gfx700 -mcode-object-v3 %s 2>&1 | 
FileCheck --check-prefix=CODE-OBJECT-V3 %s
 // CODE-OBJECT-V3: "-target-feature" "+code-object-v3"


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r363390 - AMDGPU: Use AMDGPU toolchain for other OSes

2019-06-14 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Fri Jun 14 06:39:57 2019
New Revision: 363390

URL: http://llvm.org/viewvc/llvm-project?rev=363390&view=rev
Log:
AMDGPU: Use AMDGPU toolchain for other OSes

This would need more work to actually support them, but this is less
wrong than the default.

Modified:
cfe/trunk/lib/Driver/Driver.cpp
cfe/trunk/test/Driver/amdgpu-toolchain.c

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=363390&r1=363389&r2=363390&view=diff
==
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Fri Jun 14 06:39:57 2019
@@ -4617,6 +4617,8 @@ const ToolChain &Driver::getToolChain(co
   TC = llvm::make_unique(*this, Target, Args);
   break;
 case llvm::Triple::AMDHSA:
+case llvm::Triple::AMDPAL:
+case llvm::Triple::Mesa3D:
   TC = llvm::make_unique(*this, Target, Args);
   break;
 case llvm::Triple::Win32:

Modified: cfe/trunk/test/Driver/amdgpu-toolchain.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/amdgpu-toolchain.c?rev=363390&r1=363389&r2=363390&view=diff
==
--- cfe/trunk/test/Driver/amdgpu-toolchain.c (original)
+++ cfe/trunk/test/Driver/amdgpu-toolchain.c Fri Jun 14 06:39:57 2019
@@ -1,6 +1,11 @@
 // RUN: %clang -### -target amdgcn--amdhsa -x assembler -mcpu=kaveri %s 2>&1 | 
FileCheck -check-prefix=AS_LINK %s
+// RUN: %clang -### -g -target amdgcn--amdhsa -mcpu=kaveri %s 2>&1 | FileCheck 
-check-prefix=DWARF_VER %s
+// RUN: %clang -### -target amdgcn-amd-amdpal -x assembler -mcpu=kaveri %s 
2>&1 | FileCheck -check-prefix=AS_LINK %s
+// RUN: %clang -### -g -target amdgcn-amd-amdpal -mcpu=kaveri %s 2>&1 | 
FileCheck -check-prefix=DWARF_VER %s
+// RUN: %clang -### -target amdgcn-mesa-mesa3d -x assembler -mcpu=kaveri %s 
2>&1 | FileCheck -check-prefix=AS_LINK %s
+// RUN: %clang -### -g -target amdgcn-mesa-mesa3d -mcpu=kaveri %s 2>&1 | 
FileCheck -check-prefix=DWARF_VER %s
+
 // AS_LINK: clang{{.*}} "-cc1as"
 // AS_LINK: ld.lld{{.*}} "-shared"
 
-// RUN: %clang -### -g -target amdgcn--amdhsa -mcpu=kaveri %s 2>&1 | FileCheck 
-check-prefix=DWARF_VER %s
 // DWARF_VER: "-dwarf-version=5"


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r363682 - AMDGPU: Disable errno by default

2019-06-18 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Tue Jun 18 06:59:32 2019
New Revision: 363682

URL: http://llvm.org/viewvc/llvm-project?rev=363682&view=rev
Log:
AMDGPU: Disable errno by default

Modified:
cfe/trunk/lib/Driver/ToolChains/AMDGPU.h
cfe/trunk/test/Driver/fast-math.c

Modified: cfe/trunk/lib/Driver/ToolChains/AMDGPU.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/AMDGPU.h?rev=363682&r1=363681&r2=363682&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/AMDGPU.h (original)
+++ cfe/trunk/lib/Driver/ToolChains/AMDGPU.h Tue Jun 18 06:59:32 2019
@@ -57,6 +57,8 @@ public:
   const llvm::opt::ArgList &Args);
   unsigned GetDefaultDwarfVersion() const override { return 5; }
   bool IsIntegratedAssemblerDefault() const override { return true; }
+  bool IsMathErrnoDefault() const override { return false; }
+
   llvm::opt::DerivedArgList *
   TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
 Action::OffloadKind DeviceOffloadKind) const override;

Modified: cfe/trunk/test/Driver/fast-math.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/fast-math.c?rev=363682&r1=363681&r2=363682&view=diff
==
--- cfe/trunk/test/Driver/fast-math.c (original)
+++ cfe/trunk/test/Driver/fast-math.c Tue Jun 18 06:59:32 2019
@@ -97,6 +97,12 @@
 // RUN:   | FileCheck --check-prefix=CHECK-NO-MATH-ERRNO %s
 // RUN: %clang -### -target x86_64-linux-android -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-MATH-ERRNO %s
+// RUN: %clang -### -target amdgcn-amd-amdhsa -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-MATH-ERRNO %s
+// RUN: %clang -### -target amdgcn-amd-amdpal -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-MATH-ERRNO %s
+// RUN: %clang -### -target amdgcn-mesa-mesa3d -c %s 2>&1   \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-MATH-ERRNO %s
 //
 // Check that -ffast-math disables -fmath-errno, and -fno-fast-math merely
 // preserves the target default. Also check various flag set operations between


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r363684 - AMDGPU: Add GWS instruction builtins

2019-06-18 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Tue Jun 18 07:10:01 2019
New Revision: 363684

URL: http://llvm.org/viewvc/llvm-project?rev=363684&view=rev
Log:
AMDGPU: Add GWS instruction builtins

Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=363684&r1=363683&r2=363684&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Tue Jun 18 07:10:01 2019
@@ -45,6 +45,8 @@ BUILTIN(__builtin_amdgcn_s_barrier, "v",
 BUILTIN(__builtin_amdgcn_wave_barrier, "v", "n")
 BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n")
 BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
+BUILTIN(__builtin_amdgcn_ds_gws_init, "vUiUi", "n")
+BUILTIN(__builtin_amdgcn_ds_gws_barrier, "vUiUi", "n")
 
 // FIXME: Need to disallow constant address space.
 BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=363684&r1=363683&r2=363684&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Tue Jun 18 07:10:01 2019
@@ -548,6 +548,18 @@ kernel void test_ds_consume_lds(global i
   *out = __builtin_amdgcn_ds_consume(ptr);
 }
 
+// CHECK-LABEL: @test_gws_init(
+// CHECK: call void @llvm.amdgcn.ds.gws.init(i32 %value, i32 %id)
+kernel void test_gws_init(uint value, uint id) {
+  __builtin_amdgcn_ds_gws_init(value, id);
+}
+
+// CHECK-LABEL: @test_gws_barrier(
+// CHECK: call void @llvm.amdgcn.ds.gws.barrier(i32 %value, i32 %id)
+kernel void test_gws_barrier(uint value, uint id) {
+  __builtin_amdgcn_ds_gws_barrier(value, id);
+}
+
 // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
 // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
 // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r363871 - Reapply "r363684: AMDGPU: Add GWS instruction builtins"

2019-06-19 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Wed Jun 19 12:55:49 2019
New Revision: 363871

URL: http://llvm.org/viewvc/llvm-project?rev=363871&view=rev
Log:
Reapply "r363684: AMDGPU: Add GWS instruction builtins"

Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=363871&r1=363870&r2=363871&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Wed Jun 19 12:55:49 2019
@@ -45,6 +45,8 @@ BUILTIN(__builtin_amdgcn_s_barrier, "v",
 BUILTIN(__builtin_amdgcn_wave_barrier, "v", "n")
 BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n")
 BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
+BUILTIN(__builtin_amdgcn_ds_gws_init, "vUiUi", "n")
+BUILTIN(__builtin_amdgcn_ds_gws_barrier, "vUiUi", "n")
 
 // FIXME: Need to disallow constant address space.
 BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=363871&r1=363870&r2=363871&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Wed Jun 19 12:55:49 2019
@@ -548,6 +548,18 @@ kernel void test_ds_consume_lds(global i
   *out = __builtin_amdgcn_ds_consume(ptr);
 }
 
+// CHECK-LABEL: @test_gws_init(
+// CHECK: call void @llvm.amdgcn.ds.gws.init(i32 %value, i32 %id)
+kernel void test_gws_init(uint value, uint id) {
+  __builtin_amdgcn_ds_gws_init(value, id);
+}
+
+// CHECK-LABEL: @test_gws_barrier(
+// CHECK: call void @llvm.amdgcn.ds.gws.barrier(i32 %value, i32 %id)
+kernel void test_gws_barrier(uint value, uint id) {
+  __builtin_amdgcn_ds_gws_barrier(value, id);
+}
+
 // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
 // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
 // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r366286 - AMDGPU: Add some missing builtins

2019-07-16 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Tue Jul 16 17:01:03 2019
New Revision: 366286

URL: http://llvm.org/viewvc/llvm-project?rev=366286&view=rev
Log:
AMDGPU: Add some missing builtins

Added:
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl
cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl
Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=366286&r1=366285&r2=366286&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Tue Jul 16 17:01:03 2019
@@ -108,6 +108,16 @@ BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3
 BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n")
 BUILTIN(__builtin_amdgcn_ds_append, "ii*3", "n")
 BUILTIN(__builtin_amdgcn_ds_consume, "ii*3", "n")
+BUILTIN(__builtin_amdgcn_alignbit, "UiUiUiUi", "nc")
+BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc")
+BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc")
+BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc")
+BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc")
+BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc")
+BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc")
+BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
+BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
+BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
 
 
//===--===//
 // CI+ only builtins.
@@ -163,6 +173,13 @@ TARGET_BUILTIN(__builtin_amdgcn_sdot8, "
 TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot2-insts")
 
 
//===--===//
+// GFX10+ only builtins.
+//===--===//
+TARGET_BUILTIN(__builtin_amdgcn_permlane16, "UiUiUiUiUiIbIb", "nc", 
"gfx10-insts")
+TARGET_BUILTIN(__builtin_amdgcn_permlanex16, "UiUiUiUiUiIbIb", "nc", 
"gfx10-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mov_dpp8, "UiUiIUi", "nc", "gfx10-insts")
+
+//===--===//
 // Special builtins.
 
//===--===//
 BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=366286&r1=366285&r2=366286&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Jul 16 17:01:03 2019
@@ -12679,6 +12679,8 @@ Value *CodeGenFunction::EmitAMDGPUBuilti
 
   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
+  case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
+return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
   case AMDGPU::BI__builtin_amdgcn_mov_dpp:
   case AMDGPU::BI__builtin_amdgcn_update_dpp: {
 llvm::SmallVector Args;
@@ -12744,6 +12746,10 @@ Value *CodeGenFunction::EmitAMDGPUBuilti
 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
   case AMDGPU::BI__builtin_amdgcn_lerp:
 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
+  case AMDGPU::BI__builtin_amdgcn_ubfe:
+return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
+  case AMDGPU::BI__builtin_amdgcn_sbfe:
+return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
   case AMDGPU::BI__builtin_amdgcn_uicmp:
   case AMDGPU::BI__builtin_amdgcn_uicmpl:
   case AMDGPU::BI__builtin_amdgcn_sicmp:

Added: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl?rev=366286&view=auto
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl (added)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl Tue Jul 16 17:01:03 
2019
@@ -0,0 +1,24 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1011 -S 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -S 
-emit-llvm -o - %s | FileCheck %s
+
+typedef unsigned int uint;
+
+// CHECK-LABEL: @test_permlane16(
+// CHECK: call i32 @llvm.amdgcn.permlane16(i32 %a, i32 %b, i32 %c, i32 %d, i1 
true, i1 true)
+void test_permlane16(global uint* out, uint a, uint b, uint c, uint d)

r367431 - AMDGPU: Add missing builtin declarations

2019-07-31 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Wed Jul 31 07:03:05 2019
New Revision: 367431

URL: http://llvm.org/viewvc/llvm-project?rev=367431&view=rev
Log:
AMDGPU: Add missing builtin declarations

Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=367431&r1=367430&r2=367431&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Wed Jul 31 07:03:05 2019
@@ -118,6 +118,13 @@ BUILTIN(__builtin_amdgcn_cvt_pknorm_u16,
 BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
 BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
 BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
+BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc")
+BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
+BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc")
+BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc")
+BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "LUiLUiUiLUi", "nc")
+BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "LUiLUiUiLUi", "nc")
+BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiLUiUiV4Ui", "nc")
 
 
//===--===//
 // CI+ only builtins.

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=367431&r1=367430&r2=367431&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Wed Jul 31 07:03:05 2019
@@ -9,6 +9,7 @@ typedef unsigned short ushort;
 typedef half __attribute__((ext_vector_type(2))) half2;
 typedef short __attribute__((ext_vector_type(2))) short2;
 typedef ushort __attribute__((ext_vector_type(2))) ushort2;
+typedef uint __attribute__((ext_vector_type(4))) uint4;
 
 // CHECK-LABEL: @test_div_scale_f64
 // CHECK: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, 
i1 true)
@@ -654,6 +655,48 @@ kernel void test_cvt_pk_u8_f32(global ui
   *out = __builtin_amdgcn_cvt_pk_u8_f32(src0, src1, src2);
 }
 
+// CHECK-LABEL: @test_sad_u8(
+// CHECK: tail call i32 @llvm.amdgcn.sad.u8(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_sad_u8(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_sad_u8(src0, src1, src2);
+}
+
+// CHECK-LABEL: test_msad_u8(
+// CHECK: call i32 @llvm.amdgcn.msad.u8(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_msad_u8(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_msad_u8(src0, src1, src2);
+}
+
+// CHECK-LABEL: test_sad_hi_u8(
+// CHECK: call i32 @llvm.amdgcn.sad.hi.u8(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_sad_hi_u8(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_sad_hi_u8(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_sad_u16(
+// CHECK: call i32 @llvm.amdgcn.sad.u16(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_sad_u16(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_sad_u16(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_qsad_pk_u16_u8(
+// CHECK: call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2)
+kernel void test_qsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, 
ulong src2) {
+  *out = __builtin_amdgcn_qsad_pk_u16_u8(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_mqsad_pk_u16_u8(
+// CHECK: call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src0, i32 %src1, i64 
%src2)
+kernel void test_mqsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, 
ulong src2) {
+  *out = __builtin_amdgcn_mqsad_pk_u16_u8(src0, src1, src2);
+}
+
+// CHECK-LABEL: test_mqsad_u32_u8(
+// CHECK: call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src0, i32 %src1, <4 x 
i32> %src2)
+kernel void test_mqsad_u32_u8(global uint4* out, ulong src0, uint src1, uint4 
src2) {
+  *out = __builtin_amdgcn_mqsad_u32_u8(src0, src1, src2);
+}
+
 // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
 // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
 // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r367973 - Builtins: Start adding half versions of math builtins

2019-08-05 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Mon Aug  5 20:28:37 2019
New Revision: 367973

URL: http://llvm.org/viewvc/llvm-project?rev=367973&view=rev
Log:
Builtins: Start adding half versions of math builtins

The implementation of the OpenCL builtin currently library uses 2
different hacks to get to the corresponding IR intrinsics from the
source. This will allow removal of those.

This is the set that is currently used (minus a few vector ones).

Added:
cfe/trunk/test/CodeGenOpenCL/builtins-f16.cl
Modified:
cfe/trunk/include/clang/Basic/Builtins.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp

Modified: cfe/trunk/include/clang/Basic/Builtins.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Builtins.def?rev=367973&r1=367972&r2=367973&view=diff
==
--- cfe/trunk/include/clang/Basic/Builtins.def (original)
+++ cfe/trunk/include/clang/Basic/Builtins.def Mon Aug  5 20:28:37 2019
@@ -113,14 +113,17 @@ BUILTIN(__builtin_atan2l, "LdLdLd", "Fne
 BUILTIN(__builtin_abs  , "ii"  , "ncF")
 BUILTIN(__builtin_copysign, "ddd", "ncF")
 BUILTIN(__builtin_copysignf, "fff", "ncF")
+BUILTIN(__builtin_copysignf16, "hhh", "ncF")
 BUILTIN(__builtin_copysignl, "LdLdLd", "ncF")
 BUILTIN(__builtin_copysignf128, "LLdLLdLLd", "ncF")
 BUILTIN(__builtin_fabs , "dd"  , "ncF")
 BUILTIN(__builtin_fabsf, "ff"  , "ncF")
 BUILTIN(__builtin_fabsl, "LdLd", "ncF")
+BUILTIN(__builtin_fabsf16, "hh"  , "ncF")
 BUILTIN(__builtin_fabsf128, "LLdLLd", "ncF")
 BUILTIN(__builtin_fmod , "ddd"  , "Fne")
 BUILTIN(__builtin_fmodf, "fff"  , "Fne")
+BUILTIN(__builtin_fmodf16, "hhh"  , "Fne")
 BUILTIN(__builtin_fmodl, "LdLdLd", "Fne")
 BUILTIN(__builtin_frexp , "ddi*"  , "Fn")
 BUILTIN(__builtin_frexpf, "ffi*"  , "Fn")
@@ -154,6 +157,7 @@ BUILTIN(__builtin_powif, "ffi"  , "Fnc")
 BUILTIN(__builtin_powil, "LdLdi", "Fnc")
 BUILTIN(__builtin_pow , "ddd"  , "Fne")
 BUILTIN(__builtin_powf, "fff"  , "Fne")
+BUILTIN(__builtin_powf16, "hhh"  , "Fne")
 BUILTIN(__builtin_powl, "LdLdLd", "Fne")
 
 // Standard unary libc/libm functions with double/float/long double variants:
@@ -180,9 +184,11 @@ BUILTIN(__builtin_cbrtf, "ff", "Fnc")
 BUILTIN(__builtin_cbrtl, "LdLd", "Fnc")
 BUILTIN(__builtin_ceil , "dd"  , "Fnc")
 BUILTIN(__builtin_ceilf, "ff"  , "Fnc")
+BUILTIN(__builtin_ceilf16, "hh"  , "Fnc")
 BUILTIN(__builtin_ceill, "LdLd", "Fnc")
 BUILTIN(__builtin_cos , "dd"  , "Fne")
 BUILTIN(__builtin_cosf, "ff"  , "Fne")
+BUILTIN(__builtin_cosf16, "hh"  , "Fne")
 BUILTIN(__builtin_cosh , "dd"  , "Fne")
 BUILTIN(__builtin_coshf, "ff"  , "Fne")
 BUILTIN(__builtin_coshl, "LdLd", "Fne")
@@ -195,9 +201,11 @@ BUILTIN(__builtin_erfcf, "ff", "Fne")
 BUILTIN(__builtin_erfcl, "LdLd", "Fne")
 BUILTIN(__builtin_exp , "dd"  , "Fne")
 BUILTIN(__builtin_expf, "ff"  , "Fne")
+BUILTIN(__builtin_expf16, "hh"  , "Fne")
 BUILTIN(__builtin_expl, "LdLd", "Fne")
 BUILTIN(__builtin_exp2 , "dd"  , "Fne")
 BUILTIN(__builtin_exp2f, "ff"  , "Fne")
+BUILTIN(__builtin_exp2f16, "hh"  , "Fne")
 BUILTIN(__builtin_exp2l, "LdLd", "Fne")
 BUILTIN(__builtin_expm1 , "dd", "Fne")
 BUILTIN(__builtin_expm1f, "ff", "Fne")
@@ -207,15 +215,19 @@ BUILTIN(__builtin_fdimf, "fff", "Fne")
 BUILTIN(__builtin_fdiml, "LdLdLd", "Fne")
 BUILTIN(__builtin_floor , "dd"  , "Fnc")
 BUILTIN(__builtin_floorf, "ff"  , "Fnc")
+BUILTIN(__builtin_floorf16, "hh"  , "Fnc")
 BUILTIN(__builtin_floorl, "LdLd", "Fnc")
 BUILTIN(__builtin_fma, "", "Fne")
 BUILTIN(__builtin_fmaf, "", "Fne")
+BUILTIN(__builtin_fmaf16, "", "Fne")
 BUILTIN(__builtin_fmal, "LdLdLdLd", "Fne")
 BUILTIN(__builtin_fmax, "ddd", "Fnc")
 BUILTIN(__builtin_fmaxf, "fff", "Fnc")
+BUILTIN(__builtin_fmaxf16, "hhh", "Fnc")
 BUILTIN(__builtin_fmaxl, "LdLdLd", "Fnc")
 BUILTIN(__builtin_fmin, "ddd", "Fnc")
 BUILTIN(__builtin_fminf, "fff", "Fnc")
+BUILTIN(__builtin_fminf16, "hhh", "Fnc")
 BUILTIN(__builtin_fminl, "LdLdLd", "Fnc")
 BUILTIN(__builtin_hypot , "ddd"  , "Fne")
 BUILTIN(__builtin_hypotf, "fff"  , "Fne")
@@ -235,17 +247,20 @@ BUILTIN(__builtin_llroundl, "LLiLd", "Fn
 BUILTIN(__builtin_log , "dd"  , "Fne")
 BUILTIN(__builtin_log10 , "dd"  , "Fne")
 BUILTIN(__builtin_log10f, "ff"  , "Fne")
+BUILTIN(__builtin_log10f16, "hh"  , "Fne")
 BUILTIN(__builtin_log10l, "LdLd", "Fne")
 BUILTIN(__builtin_log1p , "dd"  , "Fne")
 BUILTIN(__builtin_log1pf, "ff"  , "Fne")
 BUILTIN(__builtin_log1pl, "LdLd", "Fne")
 BUILTIN(__builtin_log2, "dd"  , "Fne")
 BUILTIN(__builtin_log2f, "ff"  , "Fne")
+BUILTIN(__builtin_log2f16, "hh"  , "Fne")
 BUILTIN(__builtin_log2l, "LdLd"  , "Fne")
 BUILTIN(__builtin_logb , "dd", "Fne")
 BUILTIN(__builtin_logbf, "ff", "Fne")
 BUILTIN(__builtin_logbl, "LdLd", "Fne")
 BUILTIN(__builtin_logf, "ff"  , "Fne")
+BUILTIN(__builtin_logf16, "hh"  , "Fne")
 BUILTIN(__builtin_logl, "LdLd", "Fne")
 BUILTIN(__builtin_lrint , "Lid", "Fne")
 BUILTIN(__builtin_lrintf, "Lif", "Fne")
@@ -270,9 +285,11 @@ BUILTIN(__builtin_remquof, "fffi*", "Fn"
 BUILTIN(__builtin_remquol, "LdLdLdi*", "Fn")

r363986 - AMDGPU: Add DS GWS sema builtins

2019-06-20 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Thu Jun 20 14:33:57 2019
New Revision: 363986

URL: http://llvm.org/viewvc/llvm-project?rev=363986&view=rev
Log:
AMDGPU: Add DS GWS sema builtins

Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl
cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=363986&r1=363985&r2=363986&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Thu Jun 20 14:33:57 2019
@@ -47,6 +47,9 @@ BUILTIN(__builtin_amdgcn_s_dcache_inv, "
 BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
 BUILTIN(__builtin_amdgcn_ds_gws_init, "vUiUi", "n")
 BUILTIN(__builtin_amdgcn_ds_gws_barrier, "vUiUi", "n")
+BUILTIN(__builtin_amdgcn_ds_gws_sema_v, "vUi", "n")
+BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n")
+BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n")
 
 // FIXME: Need to disallow constant address space.
 BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")
@@ -108,6 +111,7 @@ BUILTIN(__builtin_amdgcn_ds_consume, "ii
 
//===--===//
 TARGET_BUILTIN(__builtin_amdgcn_s_dcache_inv_vol, "v", "n", "ci-insts")
 TARGET_BUILTIN(__builtin_amdgcn_buffer_wbinvl1_vol, "v", "n", "ci-insts")
+TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_release_all, "vUi", "n", 
"ci-insts")
 
 
//===--===//
 // Interpolation builtins.

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl?rev=363986&r1=363985&r2=363986&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl Thu Jun 20 14:33:57 2019
@@ -3,6 +3,8 @@
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu fiji -S 
-emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S 
-emit-llvm -o - %s | FileCheck %s
 
+typedef unsigned int uint;
+
 // CHECK-LABEL: @test_s_dcache_inv_vol
 // CHECK: call void @llvm.amdgcn.s.dcache.inv.vol(
 void test_s_dcache_inv_vol()
@@ -17,3 +19,9 @@ void test_buffer_wbinvl1_vol()
   __builtin_amdgcn_buffer_wbinvl1_vol();
 }
 
+// CHECK-LABEL: @test_gws_sema_release_all(
+// CHECK: call void @llvm.amdgcn.ds.gws.sema.release.all(i32 %id)
+void test_gws_sema_release_all(uint id)
+{
+  __builtin_amdgcn_ds_gws_sema_release_all(id);
+}

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=363986&r1=363985&r2=363986&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Thu Jun 20 14:33:57 2019
@@ -560,6 +560,24 @@ kernel void test_gws_barrier(uint value,
   __builtin_amdgcn_ds_gws_barrier(value, id);
 }
 
+// CHECK-LABEL: @test_gws_sema_v(
+// CHECK: call void @llvm.amdgcn.ds.gws.sema.v(i32 %id)
+kernel void test_gws_sema_v(uint id) {
+  __builtin_amdgcn_ds_gws_sema_v(id);
+}
+
+// CHECK-LABEL: @test_gws_sema_br(
+// CHECK: call void @llvm.amdgcn.ds.gws.sema.br(i32 %value, i32 %id)
+kernel void test_gws_sema_br(uint value, uint id) {
+  __builtin_amdgcn_ds_gws_sema_br(value, id);
+}
+
+// CHECK-LABEL: @test_gws_sema_p(
+// CHECK: call void @llvm.amdgcn.ds.gws.sema.p(i32 %id)
+kernel void test_gws_sema_p(uint id) {
+  __builtin_amdgcn_ds_gws_sema_p(id);
+}
+
 // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
 // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
 // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent }

Modified: cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl?rev=363986&r1=363985&r2=363986&view=diff
==
--- cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl (original)
+++ cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl Thu Jun 20 14:33:57 
2019
@@ -1,8 +1,9 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s
 
-void test_ci_biltins()
+void test_ci_builtins()
 {
   __builtin_amdgcn_s_dcache_inv_vol(); // expected-error 
{{'__builtin_amdgcn_s_dcache_inv_vol' needs target feature ci-insts}}
   __builtin_amdgcn_buffer_wbinvl1_vol(); // expected-error 
{{'__builtin_am

r364123 - AMDGPU: Fix target builtins for gfx10

2019-06-21 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Fri Jun 21 18:30:00 2019
New Revision: 364123

URL: http://llvm.org/viewvc/llvm-project?rev=364123&view=rev
Log:
AMDGPU: Fix target builtins for gfx10

This wasn't setting some of the features from older generations.

Modified:
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=364123&r1=364122&r2=364123&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Fri Jun 21 18:30:00 2019
@@ -144,8 +144,10 @@ bool AMDGPUTargetInfo::initFeatureMap(
   LLVM_FALLTHROUGH;
 case GK_GFX1010:
   Features["dl-insts"] = true;
+  Features["ci-insts"] = true;
   Features["16-bit-insts"] = true;
   Features["dpp"] = true;
+  Features["gfx8-insts"] = true;
   Features["gfx9-insts"] = true;
   Features["gfx10-insts"] = true;
   Features["s-memrealtime"] = true;

Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl?rev=364123&r1=364122&r2=364123&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl Fri Jun 21 18:30:00 2019
@@ -15,9 +15,9 @@
 
 // GFX904: 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
 // GFX906: 
"target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
-// GFX1010: 
"target-features"="+16-bit-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx9-insts,+s-memrealtime"
-// GFX1011: 
"target-features"="+16-bit-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx9-insts,+s-memrealtime"
-// GFX1012: 
"target-features"="+16-bit-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx9-insts,+s-memrealtime"
+// GFX1010: 
"target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX1011: 
"target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX1012: 
"target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
 // GFX801: 
"target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime"
 // GFX700: "target-features"="+ci-insts,+fp64-fp16-denormals,-fp32-denormals"
 // GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals"

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl?rev=364123&r1=364122&r2=364123&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl Fri Jun 21 18:30:00 2019
@@ -2,6 +2,7 @@
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu hawaii -S 
-emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu fiji -S 
-emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S 
-emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S 
-emit-llvm -o - %s | FileCheck %s
 
 typedef unsigned int uint;
 

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl?rev=364123&r1=364122&r2=364123&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl 
(original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl Fri Jun 
21 18:30:00 2019
@@ -1,6 +1,7 @

r364251 - AMDGPU: Fix missing declaration for mbcnt builtins

2019-06-24 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Mon Jun 24 16:34:06 2019
New Revision: 364251

URL: http://llvm.org/viewvc/llvm-project?rev=364251&view=rev
Log:
AMDGPU: Fix missing declaration for mbcnt builtins

Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=364251&r1=364250&r2=364251&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Mon Jun 24 16:34:06 2019
@@ -33,6 +33,9 @@ BUILTIN(__builtin_amdgcn_workitem_id_x,
 BUILTIN(__builtin_amdgcn_workitem_id_y, "Ui", "nc")
 BUILTIN(__builtin_amdgcn_workitem_id_z, "Ui", "nc")
 
+BUILTIN(__builtin_amdgcn_mbcnt_hi, "UiUiUi", "nc")
+BUILTIN(__builtin_amdgcn_mbcnt_lo, "UiUiUi", "nc")
+
 
//===--===//
 // Instruction builtins.
 
//===--===//

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=364251&r1=364250&r2=364251&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Mon Jun 24 16:34:06 2019
@@ -578,6 +578,18 @@ kernel void test_gws_sema_p(uint id) {
   __builtin_amdgcn_ds_gws_sema_p(id);
 }
 
+// CHECK-LABEL: @test_mbcnt_lo(
+// CHECK: call i32 @llvm.amdgcn.mbcnt.lo(i32 %src0, i32 %src1)
+kernel void test_mbcnt_lo(global uint* out, uint src0, uint src1) {
+  *out = __builtin_amdgcn_mbcnt_lo(src0, src1);
+}
+
+// CHECK-LABEL: @test_mbcnt_hi(
+// CHECK: call i32 @llvm.amdgcn.mbcnt.hi(i32 %src0, i32 %src1)
+kernel void test_mbcnt_hi(global uint* out, uint src0, uint src1) {
+  *out = __builtin_amdgcn_mbcnt_hi(src0, src1);
+}
+
 // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
 // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
 // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r331216 - AMDGPU: Add Vega12 and Vega20

2018-04-30 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Mon Apr 30 12:08:27 2018
New Revision: 331216

URL: http://llvm.org/viewvc/llvm-project?rev=331216&view=rev
Log:
AMDGPU: Add Vega12 and Vega20

Changes by
  Matt Arsenault
  Konstantin Zhuravlyov

Added:
cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl
Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
cfe/trunk/lib/Basic/Targets/AMDGPU.h
cfe/trunk/test/Driver/amdgpu-macros.cl
cfe/trunk/test/Driver/amdgpu-mcpu.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=331216&r1=331215&r2=331216&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Mon Apr 30 12:08:27 2018
@@ -121,6 +121,18 @@ TARGET_BUILTIN(__builtin_amdgcn_mov_dpp,
 TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "", "nc", "gfx9-insts")
 
 
//===--===//
+// Deep learning builtins.
+//===--===//
+
+TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hf", "nc", "dl-insts")
+TARGET_BUILTIN(__builtin_amdgcn_sdot2, "SiV2SsV2SsSi", "nc", "dl-insts")
+TARGET_BUILTIN(__builtin_amdgcn_udot2, "UiV2UsV2UsUi", "nc", "dl-insts")
+TARGET_BUILTIN(__builtin_amdgcn_sdot4, "SiSiSiSi", "nc", "dl-insts")
+TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUi", "nc", "dl-insts")
+TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSi", "nc", "dl-insts")
+TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUi", "nc", "dl-insts")
+
+//===--===//
 // Special builtins.
 
//===--===//
 BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc")

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=331216&r1=331215&r2=331216&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Mon Apr 30 12:08:27 2018
@@ -133,6 +133,10 @@ bool AMDGPUTargetInfo::initFeatureMap(
   CPU = "gfx600";
 
 switch (parseAMDGCNName(CPU).Kind) {
+case GK_GFX906:
+  Features["dl-insts"] = true;
+  LLVM_FALLTHROUGH;
+case GK_GFX904:
 case GK_GFX902:
 case GK_GFX900:
   Features["gfx9-insts"] = true;

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.h?rev=331216&r1=331215&r2=331216&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.h (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.h Mon Apr 30 12:08:27 2018
@@ -78,9 +78,11 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTarg
 GK_GFX810,
 GK_GFX900,
 GK_GFX902,
+GK_GFX904,
+GK_GFX906,
 
 GK_AMDGCN_FIRST = GK_GFX600,
-GK_AMDGCN_LAST = GK_GFX902,
+GK_AMDGCN_LAST = GK_GFX906,
   };
 
   struct GPUInfo {
@@ -127,7 +129,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTarg
 {{"cayman"},  {"cayman"},  GK_CAYMAN,  true,  false, false, false, false},
 {{"turks"},   {"turks"},   GK_TURKS,   false, false, false, false, false},
   };
-  static constexpr GPUInfo AMDGCNGPUs[30] = {
+  static constexpr GPUInfo AMDGCNGPUs[32] = {
   // Name   CanonicalKindHas   HasHasHas   Has
   //Name FMAF  Fast   LDEXPF FP64  Fast
   //   FMAFFMA
@@ -161,6 +163,8 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTarg
 {{"stoney"},{"gfx810"},  GK_GFX810,  true, false, true,  true, true},
 {{"gfx900"},{"gfx900"},  GK_GFX900,  true, true,  true,  true, true},
 {{"gfx902"},{"gfx902"},  GK_GFX900,  true, true,  true,  true, true},
+{{"gfx904"},{"gfx904"},  GK_GFX904,  true, true,  true,  true, true},
+{{"gfx906"},{"gfx906"},  GK_GFX906,  true, true,  true,  true, true},
   };
 
   static GPUInfo parseR600Name(StringRef Name);

Added: cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl?rev=331216&view=auto
==
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl (added)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl Mon Apr 30 12:08:27 2018
@@ -0,0 +1,12 @@
+// REQUIRES: amdgpu-registered-target
+
+// Check that appropriate features are defined for every support

r336676 - Update test for backend error message change

2018-07-10 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Tue Jul 10 07:03:50 2018
New Revision: 336676

URL: http://llvm.org/viewvc/llvm-project?rev=336676&view=rev
Log:
Update test for backend error message change

Modified:
cfe/trunk/test/CodeGen/backend-unsupported-error.ll

Modified: cfe/trunk/test/CodeGen/backend-unsupported-error.ll
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/backend-unsupported-error.ll?rev=336676&r1=336675&r2=336676&view=diff
==
--- cfe/trunk/test/CodeGen/backend-unsupported-error.ll (original)
+++ cfe/trunk/test/CodeGen/backend-unsupported-error.ll Tue Jul 10 07:03:50 2018
@@ -3,7 +3,7 @@
 
 ; This is to check that backend errors for unsupported features are formatted 
correctly
 
-; CHECK: error: test.c:2:20: in function bar i32 (): unsupported call to 
function foo.2
+; CHECK: error: test.c:2:20: in function bar i32 (): unsupported call to 
function foo
 
 target triple = "r600-unknown-unknown"
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r336681 - AMDGPU: Try to fix test again

2018-07-10 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Tue Jul 10 07:47:31 2018
New Revision: 336681

URL: http://llvm.org/viewvc/llvm-project?rev=336681&view=rev
Log:
AMDGPU: Try to fix test again

Modified:
cfe/trunk/test/CodeGen/backend-unsupported-error.ll

Modified: cfe/trunk/test/CodeGen/backend-unsupported-error.ll
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/backend-unsupported-error.ll?rev=336681&r1=336680&r2=336681&view=diff
==
--- cfe/trunk/test/CodeGen/backend-unsupported-error.ll (original)
+++ cfe/trunk/test/CodeGen/backend-unsupported-error.ll Tue Jul 10 07:47:31 2018
@@ -21,7 +21,7 @@ entry:
   ret i32 %call, !dbg !15
 }
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" 
"less-precise-fpmad"="false" "no-frame-pointer-elim"="true" 
"no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" 
"no-nans-fp-math"="false" "stack-protector-buffer-size"="8" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind noinline "disable-tail-calls"="false" 
"less-precise-fpmad"="false" "no-frame-pointer-elim"="true" 
"no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" 
"no-nans-fp-math"="false" "stack-protector-buffer-size"="8" 
"unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 40ab8ae - OpenMP: Add helper function for convergent runtime calls

2019-10-27 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2019-10-27T21:26:55-07:00
New Revision: 40ab8ae9fb70f1550815bf0f867148b5101a4f66

URL: 
https://github.com/llvm/llvm-project/commit/40ab8ae9fb70f1550815bf0f867148b5101a4f66
DIFF: 
https://github.com/llvm/llvm-project/commit/40ab8ae9fb70f1550815bf0f867148b5101a4f66.diff

LOG: OpenMP: Add helper function for convergent runtime calls

Most of the functions emitted here should probably be convergent, but
only barriers are currently marked. Introduce this helper before
adding convergent to more functions.

Added: 


Modified: 
clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/CodeGenModule.h

Removed: 




diff  --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 708260429f68..910992e76d0a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -1799,9 +1799,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned 
Function) {
 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
 auto *FnTy =
 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
-cast(RTLFn.getCallee())
-->addFnAttr(llvm::Attribute::Convergent);
+RTLFn =
+CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
 break;
   }
   case OMPRTL__kmpc_barrier_simple_spmd: {
@@ -1810,10 +1809,8 @@ 
CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
 auto *FnTy =
 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-RTLFn =
-CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier_simple_spmd");
-cast(RTLFn.getCallee())
-->addFnAttr(llvm::Attribute::Convergent);
+RTLFn = CGM.CreateConvergentRuntimeFunction(
+FnTy, /*Name*/ "__kmpc_barrier_simple_spmd");
 break;
   }
   case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index b05a58848e82..75708d6e4966 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3332,8 +3332,14 @@ GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) {
 /// type and name.
 llvm::FunctionCallee
 CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name,
- llvm::AttributeList ExtraAttrs,
- bool Local) {
+ llvm::AttributeList ExtraAttrs, bool 
Local,
+ bool AssumeConvergent) {
+  if (AssumeConvergent) {
+ExtraAttrs =
+ExtraAttrs.addAttribute(VMContext, llvm::AttributeList::FunctionIndex,
+llvm::Attribute::Convergent);
+  }
+
   llvm::Constant *C =
   GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
   /*DontDefer=*/false, /*IsThunk=*/false,

diff  --git a/clang/lib/CodeGen/CodeGenModule.h 
b/clang/lib/CodeGen/CodeGenModule.h
index 73f81adae35f..f5014c05b067 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1027,11 +1027,22 @@ class CodeGenModule : public CodeGenTypeCache {
   }
 
   /// Create or return a runtime function declaration with the specified type
-  /// and name.
+  /// and name. If \p AssumeConvergent is true, the call will have the
+  /// convergent attribute added.
   llvm::FunctionCallee
   CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name,
 llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
-bool Local = false);
+bool Local = false, bool AssumeConvergent = false);
+
+  /// Create or return a runtime function declaration with the specified type
+  /// and name. This will automatically add the convergent attribute to the
+  /// function declaration.
+  llvm::FunctionCallee CreateConvergentRuntimeFunction(
+  llvm::FunctionType *Ty, StringRef Name,
+  llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
+  bool Local = false) {
+return CreateRuntimeFunction(Ty, Name, ExtraAttrs, Local, true);
+  }
 
   /// Create a new runtime global variable with the specified type and name.
   llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty,



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 56a503b - OpenMP: Add convergent to more runtime functions

2019-10-27 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2019-10-27T21:26:55-07:00
New Revision: 56a503bdba9c33fce4d8fe86494cfd9c0b62c88a

URL: 
https://github.com/llvm/llvm-project/commit/56a503bdba9c33fce4d8fe86494cfd9c0b62c88a
DIFF: 
https://github.com/llvm/llvm-project/commit/56a503bdba9c33fce4d8fe86494cfd9c0b62c88a.diff

LOG: OpenMP: Add convergent to more runtime functions

Several of these other functions are probably also convergent, but
these two seem obviously convergent.

Added: 


Modified: 
clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
clang/test/OpenMP/nvptx_parallel_codegen.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 910992e76d0a..b7808e0c4f69 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -1817,14 +1817,14 @@ 
CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
 // Build int32_t __kmpc_warp_active_thread_mask(void);
 auto *FnTy =
 llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false);
-RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask");
+RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, 
"__kmpc_warp_active_thread_mask");
 break;
   }
   case OMPRTL_NVPTX__kmpc_syncwarp: {
 // Build void __kmpc_syncwarp(kmp_int32 Mask);
 auto *FnTy =
 llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false);
-RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_syncwarp");
+RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp");
 break;
   }
   }

diff  --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp 
b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
index 32061bf7386c..2fc06350c380 100644
--- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -88,7 +88,7 @@ int bar(int n){
 // CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
 //
 // CHECK: [[AWAIT_WORK]]
-// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 
#[[#BARRIER_ATTRS:]]
+// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) 
#[[#CONVERGENT:]]
 // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]
 // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
@@ -318,10 +318,10 @@ int bar(int n){
 // CHECK: define internal void [[PARALLEL_FN4]](
 // CHECK: [[A:%.+]] = alloca i[[SZ:32|64]],
 // CHECK: store i[[SZ]] 45, i[[SZ]]* %a,
-// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) 
#[[#BARRIER_ATTRS]]
+// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) 
#[[#CONVERGENT:]]
 // CHECK: ret void
 
-// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) 
#[[#BARRIER_ATTRS]]
+// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT]]
 
 // CHECK-LABEL: define {{.*}}void 
{{@__omp_offloading_.+template.+l55}}_worker()
 // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l55}}(
@@ -343,7 +343,7 @@ int bar(int n){
 
 // CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* 
noalias %{{.+}}, i32* dereferenceable{{.*}})
 // CHECK:  [[CC:%.+]] = alloca i32,
-// CHECK:  [[MASK:%.+]] = call i32 @__kmpc_warp_active_thread_mask()
+// CHECK:  [[MASK:%.+]] = call i32 @__kmpc_warp_active_thread_mask(){{$}}
 // CHECK:  [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 // CHECK:  [[NUM_THREADS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK:  store i32 0, i32* [[CC]],
@@ -363,11 +363,15 @@ int bar(int n){
 // CHECK:  store i32
 // CHECK:  call void @__kmpc_end_critical(
 
-// CHECK:  call void @__kmpc_syncwarp(i32 [[MASK]])
+// CHECK:  call void @__kmpc_syncwarp(i32 [[MASK]]){{$}}
 // CHECK:  [[NEW_CC_VAL:%.+]] = add nsw i32 [[CC_VAL]], 1
 // CHECK:  store i32 [[NEW_CC_VAL]], i32* [[CC]],
 // CHECK:  br label
 
-// CHECK: attributes #[[#BARRIER_ATTRS]] = {{.*}} convergent {{.*}}
+
+// CHECK: declare i32 @__kmpc_warp_active_thread_mask() #[[#CONVERGENT:]]
+// CHECK: declare void @__kmpc_syncwarp(i32) #[[#CONVERGENT:]]
+
+// CHECK: attributes #[[#CONVERGENT]] = {{.*}} convergent {{.*}}
 
 #endif



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 1d96dca - HIP: Try to deal with more llvm package layouts

2020-05-25 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-05-23T13:28:24-04:00
New Revision: 1d96dca9491e3d75c11c3cd1acff5fcda8c2f613

URL: 
https://github.com/llvm/llvm-project/commit/1d96dca9491e3d75c11c3cd1acff5fcda8c2f613
DIFF: 
https://github.com/llvm/llvm-project/commit/1d96dca9491e3d75c11c3cd1acff5fcda8c2f613.diff

LOG: HIP: Try to deal with more llvm package layouts

The various HIP builds are all inconsistent.

The default llvm install goes to ${INSTALL_PREFIX}/bin/clang, but the
rocm packaging scripts move this under
${INSTALL_PREFIX}/llvm/bin/clang. Some other builds further pollute
this with ${INSTALL_PREFIX}/bin/x86_64/clang. These should really be
consolidated, but try to handle them for now.

Added: 


Modified: 
clang/lib/Driver/ToolChains/AMDGPU.cpp

Removed: 




diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 193ccad98f52..3e51bd00bae4 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -107,11 +107,18 @@ RocmInstallationDetector::RocmInstallationDetector(
 // the Windows-esque layout the ROCm packages use with the host 
architecture
 // subdirectory of bin.
 
+// Strip off directory (usually bin)
 StringRef ParentDir = llvm::sys::path::parent_path(InstallDir);
-if (ParentDir == HostTriple.getArchName())
+StringRef ParentName = llvm::sys::path::filename(ParentDir);
+
+// Some builds use bin/{host arch}, so go up again.
+if (ParentName == "bin") {
   ParentDir = llvm::sys::path::parent_path(ParentDir);
+  ParentName = llvm::sys::path::filename(ParentDir);
+}
 
-if (ParentDir == "bin") {
+if (ParentName == "llvm") {
+  // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
   Candidates.emplace_back(llvm::sys::path::parent_path(ParentDir).str(),
   /*StrictChecking=*/true);
 }



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[libclc] cf4d4e3 - libclc: Compile with -nostdlib

2020-05-28 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-05-28T10:41:31-04:00
New Revision: cf4d4e366a2165f0e93948f166d76ae650aecc98

URL: 
https://github.com/llvm/llvm-project/commit/cf4d4e366a2165f0e93948f166d76ae650aecc98
DIFF: 
https://github.com/llvm/llvm-project/commit/cf4d4e366a2165f0e93948f166d76ae650aecc98.diff

LOG: libclc: Compile with -nostdlib

This fixes a build error when compiling for amdgcn-amd-amdhsa, which
defaults to trying to link bitcode libraries.

Added: 


Modified: 
libclc/CMakeLists.txt

Removed: 




diff  --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 7b981110f6fd..9472f191fbde 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -262,7 +262,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
target_compile_definitions( builtins.link.${arch_suffix} PRIVATE
"__CLC_INTERNAL" )
target_compile_options( builtins.link.${arch_suffix} PRIVATE  
-target
-   ${t} ${mcpu} -fno-builtin )
+   ${t} ${mcpu} -fno-builtin -nostdlib )
set_target_properties( builtins.link.${arch_suffix} PROPERTIES
LINKER_LANGUAGE CLC )
 



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 97f3f0b - AMDGPU: Add intrinsic for s_setreg

2020-05-28 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-05-28T14:26:38-04:00
New Revision: 97f3f0bab0982f84745c7ac5ce8fb6b0918ff718

URL: 
https://github.com/llvm/llvm-project/commit/97f3f0bab0982f84745c7ac5ce8fb6b0918ff718
DIFF: 
https://github.com/llvm/llvm-project/commit/97f3f0bab0982f84745c7ac5ce8fb6b0918ff718.diff

LOG: AMDGPU: Add intrinsic for s_setreg

This will be more useful with fenv access implemented.

Added: 
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll

Modified: 
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/test/CodeGenOpenCL/builtins-amdgcn.cl
clang/test/SemaOpenCL/builtins-amdgcn-error.cl
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SOPInstructions.td

Removed: 




diff  --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 5633ccd5d744..28379142b05a 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -44,6 +44,7 @@ BUILTIN(__builtin_amdgcn_mbcnt_lo, "UiUiUi", "nc")
 // Instruction builtins.
 
//===--===//
 BUILTIN(__builtin_amdgcn_s_getreg, "UiIi", "n")
+BUILTIN(__builtin_amdgcn_s_setreg, "vIiUi", "n")
 BUILTIN(__builtin_amdgcn_s_getpc, "LUi", "n")
 BUILTIN(__builtin_amdgcn_s_waitcnt, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_sendmsg, "vIiUi", "n")

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 8f2f149103b3..3563ad464c66 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -715,6 +715,12 @@ kernel void test_mqsad_u32_u8(global uint4* out, ulong 
src0, uint src1, uint4 sr
   *out = __builtin_amdgcn_mqsad_u32_u8(src0, src1, src2);
 }
 
+// CHECK-LABEL: test_s_setreg(
+// CHECK: call void @llvm.amdgcn.s.setreg(i32 8193, i32 %val)
+kernel void test_s_setreg(uint val) {
+  __builtin_amdgcn_s_setreg(8193, val);
+}
+
 // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
 // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
 // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }

diff  --git a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
index ad5e8776b2e8..dbe2900b600b 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -139,3 +139,8 @@ void test_fence() {
   const char ptr[] = "workgroup";
   __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ptr); // expected-error 
{{expression is not a string literal}}
 }
+
+void test_s_setreg(int x, int y) {
+  __builtin_amdgcn_s_setreg(x, 0); // expected-error {{argument to 
'__builtin_amdgcn_s_setreg' must be a constant integer}}
+  __builtin_amdgcn_s_setreg(x, y); // expected-error {{argument to 
'__builtin_amdgcn_s_setreg' must be a constant integer}}
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td 
b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index e2d8f3cb1bd6..40449304ed04 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1207,6 +1207,16 @@ def int_amdgcn_s_getreg :
   [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg>]
 >;
 
+// Note this can be used to set FP environment properties that are
+// unsafe to change in non-strictfp functions. The register properties
+// available (and value required to access them) may 
diff er per
+// subtarget. llvm.amdgcn.s.setreg(hwmode, value)
+def int_amdgcn_s_setreg :
+  GCCBuiltin<"__builtin_amdgcn_s_setreg">,
+  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrHasSideEffects, ImmArg>]
+>;
+
 // int_amdgcn_s_getpc is provided to allow a specific style of position
 // independent code to determine the high part of its address when it is
 // known (through convention) that the code and any data of interest does

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td 
b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 3b8f88271458..59f9866b93b6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -202,13 +202,6 @@ def AMDGPUSetCCOp : SDTypeProfile<1, 3, [// setcc
 
 def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
 
-def AMDGPUSetRegOp :  SDTypeProfile<0, 2, [
-  SDTCisInt<0>, SDTCisInt<1>
-]>;
-
-def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [
-  SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>;
-
 def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [
SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 

diff  --git a/llvm/lib/Target/AMDGP

[clang] 301a6da - AMDGPU: Fix clang side null pointer value for private

2020-06-02 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-06-02T09:23:46-04:00
New Revision: 301a6da8c24a09052e3bda10e90b450b7b39ffea

URL: 
https://github.com/llvm/llvm-project/commit/301a6da8c24a09052e3bda10e90b450b7b39ffea
DIFF: 
https://github.com/llvm/llvm-project/commit/301a6da8c24a09052e3bda10e90b450b7b39ffea.diff

LOG: AMDGPU: Fix clang side null pointer value for private

The change to fold_priv_arith looks strange to me, but this was
already the untested behavior for local.

Added: 


Modified: 
clang/lib/Basic/Targets/AMDGPU.h
clang/test/CodeGenOpenCL/amdgpu-nullptr.cl

Removed: 




diff  --git a/clang/lib/Basic/Targets/AMDGPU.h 
b/clang/lib/Basic/Targets/AMDGPU.h
index 6c9060aa3f7b..e4194a881e3f 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -355,7 +355,9 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : 
public TargetInfo {
   // address space has value 0 but in private and local address space has
   // value ~0.
   uint64_t getNullPointerValue(LangAS AS) const override {
-return AS == LangAS::opencl_local ? ~0 : 0;
+// FIXME: Also should handle region.
+return (AS == LangAS::opencl_local || AS == LangAS::opencl_private)
+  ? ~0 : 0;
   }
 
   void setAuxTarget(const TargetInfo *Aux) override;

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl 
b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
index ba34d168bf79..753f7f6f4406 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
@@ -19,7 +19,7 @@ typedef struct {
 
 // Test 0 as initializer.
 
-// CHECK: @private_p = local_unnamed_addr addrspace(1) global i8 addrspace(5)* 
null, align 4
+// CHECK: @private_p = local_unnamed_addr addrspace(1) global i8 addrspace(5)* 
addrspacecast (i8* null to i8 addrspace(5)*), align 4
 private char *private_p = 0;
 
 // CHECK: @local_p = local_unnamed_addr addrspace(1) global i8 addrspace(3)* 
addrspacecast (i8* null to i8 addrspace(3)*), align 4
@@ -36,7 +36,7 @@ generic char *generic_p = 0;
 
 // Test NULL as initializer.
 
-// CHECK: @private_p_NULL = local_unnamed_addr addrspace(1) global i8 
addrspace(5)* null, align 4
+// CHECK: @private_p_NULL = local_unnamed_addr addrspace(1) global i8 
addrspace(5)* addrspacecast (i8* null to i8 addrspace(5)*), align 4
 private char *private_p_NULL = NULL;
 
 // CHECK: @local_p_NULL = local_unnamed_addr addrspace(1) global i8 
addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
@@ -57,25 +57,28 @@ generic char *generic_p_NULL = NULL;
 // CHECK: @fold_generic = local_unnamed_addr addrspace(1) global i32* null, 
align 8
 generic int *fold_generic = (global int*)(generic float*)(private char*)0;
 
-// CHECK: @fold_priv = local_unnamed_addr addrspace(1) global i16 
addrspace(5)* null, align 4
+// CHECK: @fold_priv = local_unnamed_addr addrspace(1) global i16 
addrspace(5)* addrspacecast (i16* null to i16 addrspace(5)*), align 4
 private short *fold_priv = (private short*)(generic int*)(global void*)0;
 
-// CHECK: @fold_priv_arith = local_unnamed_addr addrspace(1) global i8 
addrspace(5)* inttoptr (i32 10 to i8 addrspace(5)*), align 4
+// CHECK: @fold_priv_arith = local_unnamed_addr addrspace(1) global i8 
addrspace(5)* inttoptr (i32 9 to i8 addrspace(5)*), align 4
 private char *fold_priv_arith = (private char*)0 + 10;
 
-// CHECK: @fold_int = local_unnamed_addr addrspace(1) global i32 14, align 4
+// CHECK: @fold_local_arith = local_unnamed_addr addrspace(1) global i8 
addrspace(3)* inttoptr (i32 9 to i8 addrspace(3)*), align 4
+local char *fold_local_arith = (local char*)0 + 10;
+
+// CHECK: @fold_int = local_unnamed_addr addrspace(1) global i32 13, align 4
 int fold_int = (int)(private void*)(generic char*)(global int*)0 + 14;
 
-// CHECK: @fold_int2 = local_unnamed_addr addrspace(1) global i32 13, align 4
+// CHECK: @fold_int2 = local_unnamed_addr addrspace(1) global i32 12, align 4
 int fold_int2 = (int) ((private void*)0 + 13);
 
-// CHECK: @fold_int3 = local_unnamed_addr addrspace(1) global i32 0, align 4
+// CHECK: @fold_int3 = local_unnamed_addr addrspace(1) global i32 -1, align 4
 int fold_int3 = (int) ((private int*)0);
 
-// CHECK: @fold_int4 = local_unnamed_addr addrspace(1) global i32 8, align 4
+// CHECK: @fold_int4 = local_unnamed_addr addrspace(1) global i32 7, align 4
 int fold_int4 = (int) &((private int*)0)[2];
 
-// CHECK: @fold_int5 = local_unnamed_addr addrspace(1) global i32 4, align 4
+// CHECK: @fold_int5 = local_unnamed_addr addrspace(1) global i32 3, align 4
 int fold_int5 = (int) &((private StructTy1*)0)->p2;
 
 
@@ -97,12 +100,12 @@ int fold_int5_local = (int) &((local StructTy1*)0)->p2;
 
 // Test static variable initialization.
 
-// NOOPT: @test_static_var_private.sp1 = internal addrspace(1) global i8 
addrspace(5)* null, align 4
-// NOOPT: @test_static_var_private.sp2 = internal addrspace(1) global i8 
addrspace(5)* null, align 4
-/

[clang] 4593e41 - AMDGPU: Teach toolchain to link rocm device libs

2020-04-10 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-04-10T13:37:32-04:00
New Revision: 4593e4131affa84e61d7b6844be409ba46d29f11

URL: 
https://github.com/llvm/llvm-project/commit/4593e4131affa84e61d7b6844be409ba46d29f11
DIFF: 
https://github.com/llvm/llvm-project/commit/4593e4131affa84e61d7b6844be409ba46d29f11.diff

LOG: AMDGPU: Teach toolchain to link rocm device libs

Currently the library is separately linked, but this isn't correct to
implement fast math flags correctly. Each module should get the
version of the library appropriate for its combination of fast math
and related flags, with the attributes propagated into its functions
and internalized.

HIP already maintains the list of libraries, but this is not used for
OpenCL. Unfortunately, HIP uses a separate --hip-device-lib argument,
despite both languages using the same bitcode library. Eventually
these two searches need to be merged.

An additional problem is there are 3 different locations the libraries
are installed, depending on which build is used. This also needs to be
consolidated (or at least the search logic needs to deal with this
unnecessary complexity).

Added: 
clang/test/Driver/Inputs/rocm-device-libs/lib/hip.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/ockl.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_off.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_on.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_off.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_on.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1010.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1011.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1012.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_803.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_900.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_off.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_on.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_off.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_on.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/ocml.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/opencl.amdgcn.bc
clang/test/Driver/rocm-detect.cl
clang/test/Driver/rocm-device-libs.cl
clang/test/Driver/rocm-not-found.cl

Modified: 
clang/include/clang/Basic/DiagnosticDriverKinds.td
clang/include/clang/Driver/Options.td
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/AMDGPU.cpp
clang/lib/Driver/ToolChains/AMDGPU.h
clang/lib/Driver/ToolChains/HIP.cpp
clang/lib/Driver/ToolChains/HIP.h
clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
clang/test/Driver/amdgpu-visibility.cl
llvm/include/llvm/Support/TargetParser.h
llvm/lib/Support/TargetParser.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td 
b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index cba59cb3b66d..b28ee88f3d87 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -55,6 +55,14 @@ def err_drv_no_cuda_installation : Error<
 def err_drv_no_cuda_libdevice : Error<
   "cannot find libdevice for %0. Provide path to 
diff erent CUDA installation "
   "via --cuda-path, or pass -nocudalib to build without linking with 
libdevice.">;
+
+def err_drv_no_rocm_installation : Error<
+  "cannot find ROCm installation.  Provide its path via --rocm-path, or pass "
+  "-nogpulib.">;
+def err_drv_no_rocm_device_lib : Error<
+  "cannot find device library for %0. Provide path to 
diff erent ROCm installation "
+  "via --rocm-path, or pass -nogpulib to build without linking default 
libraries.">;
+
 def err_drv_cuda_version_unsupported : Error<
   "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), "
   "but installation at %3 is %4. Use --cuda-path to specify a 
diff erent CUDA "

diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 02875f68ebfe..661aad49a8ee 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -608,6 +608,8 @@ def : Flag<["-"], "fno-cuda-rdc">, Alias;
 def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>,
   HelpText<"Use 32-bit pointers for accessing const/local/shared address 
spaces.">;
 def fno_cuda_short

[clang] 1e93b3d - Disable test on windows

2020-04-10 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-04-10T18:48:18-04:00
New Revision: 1e93b3d8a75638c2268df58e44a8738845df3ed5

URL: 
https://github.com/llvm/llvm-project/commit/1e93b3d8a75638c2268df58e44a8738845df3ed5
DIFF: 
https://github.com/llvm/llvm-project/commit/1e93b3d8a75638c2268df58e44a8738845df3ed5.diff

LOG: Disable test on windows

Added: 


Modified: 
clang/test/Driver/rocm-device-libs.cl

Removed: 




diff  --git a/clang/test/Driver/rocm-device-libs.cl 
b/clang/test/Driver/rocm-device-libs.cl
index 83641d24d156..77e9782f2594 100644
--- a/clang/test/Driver/rocm-device-libs.cl
+++ b/clang/test/Driver/rocm-device-libs.cl
@@ -1,5 +1,6 @@
 // REQUIRES: clang-driver
 // REQUIRES: amdgpu-registered-target
+// REQUIRES: !system-windows
 
 // Test flush-denormals-to-zero enabled uses oclc_daz_opt_on
 



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] dc89a3e - HIP: Fix handling of denormal mode

2020-04-13 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-04-13T11:48:45-07:00
New Revision: dc89a3efb43feedec04facfa2206de011d2606e7

URL: 
https://github.com/llvm/llvm-project/commit/dc89a3efb43feedec04facfa2206de011d2606e7
DIFF: 
https://github.com/llvm/llvm-project/commit/dc89a3efb43feedec04facfa2206de011d2606e7.diff

LOG: HIP: Fix handling of denormal mode

I didn't realize HIP was a distinct offloading kind, so the subtarget
was looking for -march, which isn't correct for HIP. We also have the
possibility of different denormal defaults in the case of multiple
offload targets, so we need to thread the JobAction through the target
hook.

Added: 


Modified: 
clang/include/clang/Driver/ToolChain.h
clang/lib/Driver/ToolChains/AMDGPU.cpp
clang/lib/Driver/ToolChains/AMDGPU.h
clang/lib/Driver/ToolChains/Clang.cpp
clang/lib/Driver/ToolChains/Cuda.cpp
clang/lib/Driver/ToolChains/Cuda.h
clang/lib/Driver/ToolChains/Linux.cpp
clang/lib/Driver/ToolChains/Linux.h
clang/lib/Driver/ToolChains/PS4CPU.h
clang/test/Driver/cuda-flush-denormals-to-zero.cu

Removed: 




diff  --git a/clang/include/clang/Driver/ToolChain.h 
b/clang/include/clang/Driver/ToolChain.h
index 66f22d538138..fb3cbd7f84c8 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -636,8 +636,7 @@ class ToolChain {
   /// environment for the given \p FPType if given. Otherwise, the default
   /// assumed mode for any floating point type.
   virtual llvm::DenormalMode getDefaultDenormalModeForType(
-  const llvm::opt::ArgList &DriverArgs,
-  Action::OffloadKind DeviceOffloadKind,
+  const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
   const llvm::fltSemantics *FPType = nullptr) const {
 return llvm::DenormalMode::getIEEE();
   }

diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index f09578f4769e..2a796f28403f 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -273,18 +273,22 @@ bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
 }
 
 llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
-const llvm::opt::ArgList &DriverArgs, Action::OffloadKind 
DeviceOffloadKind,
+const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
 const llvm::fltSemantics *FPType) const {
   // Denormals should always be enabled for f16 and f64.
   if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
 return llvm::DenormalMode::getIEEE();
 
-  if (DeviceOffloadKind == Action::OFK_Cuda) {
+  if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
+  JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
+auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch());
 if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
 DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
options::OPT_fno_cuda_flush_denormals_to_zero,
-   false))
+   getDefaultDenormsAreZeroForTarget(Kind)))
   return llvm::DenormalMode::getPreserveSign();
+
+return llvm::DenormalMode::getIEEE();
   }
 
   const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
@@ -294,7 +298,9 @@ llvm::DenormalMode 
AMDGPUToolChain::getDefaultDenormalModeForType(
   // them all?
   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
  getDefaultDenormsAreZeroForTarget(Kind);
-  // Outputs are flushed to zero, preserving sign
+
+  // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
+  // also implicit treated as zero (DAZ).
   return DAZ ? llvm::DenormalMode::getPreserveSign() :
llvm::DenormalMode::getIEEE();
 }

diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.h 
b/clang/lib/Driver/ToolChains/AMDGPU.h
index 87a16272d624..afd71e1f595b 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -214,8 +214,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public 
Generic_ELF {
   static bool getDefaultDenormsAreZeroForTarget(llvm::AMDGPU::GPUKind GPUKind);
 
   llvm::DenormalMode getDefaultDenormalModeForType(
-  const llvm::opt::ArgList &DriverArgs,
-  Action::OffloadKind DeviceOffloadKind,
+  const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
   const llvm::fltSemantics *FPType = nullptr) const override;
 };
 

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 5f9b6d813416..415ef27eee0a 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2510,7 +2510,7 @@ static void CollectArgsForIntegratedAssembler(Compilation 
&C,
 static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
bool OFastEnabled, const ArgList &

[clang] 3a61245 - clang/AMDGPU: Assume denormals are enabled for the default target.

2020-04-15 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-04-15T09:17:26-04:00
New Revision: 3a612450508b314b7a6f4db142d0c619031d760e

URL: 
https://github.com/llvm/llvm-project/commit/3a612450508b314b7a6f4db142d0c619031d760e
DIFF: 
https://github.com/llvm/llvm-project/commit/3a612450508b314b7a6f4db142d0c619031d760e.diff

LOG: clang/AMDGPU: Assume denormals are enabled for the default target.

Since the default logic was based on having fast denormal/fma
features, and the default target has no features, we assumed flushing
by default. This fixes incorrectly assuming flushing in builds for
"generic" IR libraries.

The handling for no specified --cuda-gpu-arch in HIP is kind of
broken. Somewhere else forces a default target of gfx803, which does
not enable denormal handling by default. We don't see this default
switching here, so you'll end up with a different denormal mode
depending on whether you explicitly requested gfx803, or used it by
default.

Added: 


Modified: 
clang/lib/Driver/ToolChains/AMDGPU.cpp
clang/test/Driver/cl-denorms-are-zero.cl
clang/test/Driver/cuda-flush-denormals-to-zero.cu

Removed: 




diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 2a796f28403f..e8c0b824ace1 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -262,6 +262,11 @@ AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, 
StringRef BoundArch,
 
 bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
 llvm::AMDGPU::GPUKind Kind) {
+
+  // Assume nothing without a specific target.
+  if (Kind == llvm::AMDGPU::GK_NONE)
+return false;
+
   const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
 
   // Default to enabling f32 denormals by default on subtargets where fma is

diff  --git a/clang/test/Driver/cl-denorms-are-zero.cl 
b/clang/test/Driver/cl-denorms-are-zero.cl
index 7774c0d60da8..e3fd095e5831 100644
--- a/clang/test/Driver/cl-denorms-are-zero.cl
+++ b/clang/test/Driver/cl-denorms-are-zero.cl
@@ -1,20 +1,24 @@
 // Slow FMAF and slow f32 denormals
-// RUN: %clang -### -target amdgcn--amdhsa -c -mcpu=pitcairn %s 2>&1 | 
FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
+// RUN: %clang -### -target amdgcn--amdhsa -nogpulib -c -mcpu=pitcairn %s 2>&1 
| FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
 // RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -c 
-mcpu=pitcairn %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
 
 // Fast FMAF, but slow f32 denormals
-// RUN: %clang -### -target amdgcn--amdhsa -c -mcpu=tahiti %s 2>&1 | FileCheck 
-check-prefixes=AMDGCN,AMDGCN-FLUSH %s
+// RUN: %clang -### -target amdgcn--amdhsa -nogpulib -c -mcpu=tahiti %s 2>&1 | 
FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
 // RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -c 
-mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
 
 // Fast F32 denormals, but slow FMAF
-// RUN: %clang -### -target amdgcn--amdhsa -c -mcpu=fiji %s 2>&1 | FileCheck 
-check-prefixes=AMDGCN,AMDGCN-FLUSH %s
+// RUN: %clang -### -target amdgcn--amdhsa -nogpulib -c -mcpu=fiji %s 2>&1 | 
FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
 // RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -c 
-mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
 
 // Fast F32 denormals and fast FMAF
-// RUN: %clang -### -target amdgcn--amdhsa -c -mcpu=gfx900 %s 2>&1 | FileCheck 
-check-prefixes=AMDGCN,AMDGCN-DENORM %s
-// RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -c 
-mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
+// RUN: %clang -### -target amdgcn--amdhsa -nogpulib -c -mcpu=gfx900 %s 2>&1 | 
FileCheck -check-prefixes=AMDGCN,AMDGCN-DENORM %s
+// RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -nogpulib 
-c -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
+
+// Default target is artificial, but should assume a conservative default.
+// RUN: %clang -### -target amdgcn--amdhsa -nogpulib -c %s 2>&1 | FileCheck 
-check-prefixes=AMDGCN,AMDGCN-DENORM %s
+// RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -nogpulib 
-c %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s
 
 // AMDGCN-FLUSH: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
 
 // This should be omitted and default to ieee
-// AMDGCN-DENORM-NOT: "-fdenormal-fp-math-f32"
+// AMDGCN-DENORM-NOT: denormal-fp-math

diff  --git a/clang/test/Driver/cuda-flush-denormals-to-zero.cu 
b/clang/test/Driver/cuda-flush-denormals-to-zero.cu
index a515b5f8ca07..4a7b88823771 100644
--- a/clang/test/Driver/cuda-flush-denormals-to-zero.cu
+++ b/clang/test/Driver/cuda-flush-denormals-to-zero.cu
@@ -22,6 +22,8 @@
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c 
-march=haswell --cuda-gpu-arch=gfx803 -nocudain

[clang] 9e03bde - AMDGPU: Add llvm.amdgcn.sqrt intrinsic

2020-06-26 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-06-26T15:07:07-04:00
New Revision: 9e03bdebc17a223416d682f64ef2046b8bf0fc98

URL: 
https://github.com/llvm/llvm-project/commit/9e03bdebc17a223416d682f64ef2046b8bf0fc98
DIFF: 
https://github.com/llvm/llvm-project/commit/9e03bdebc17a223416d682f64ef2046b8bf0fc98.diff

LOG: AMDGPU: Add llvm.amdgcn.sqrt intrinsic

I spread the GlobalISel test into the regular one, which I've been
avoiding so far.

Added: 
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.ll

Modified: 
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
clang/test/CodeGenOpenCL/builtins-amdgcn.cl
clang/test/SemaOpenCL/builtins-amdgcn-error-f16.cl
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/VOP1Instructions.td

Removed: 




diff  --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 9add10c64962..60be0525fabc 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -77,6 +77,8 @@ BUILTIN(__builtin_amdgcn_trig_preop, "ddi", "nc")
 BUILTIN(__builtin_amdgcn_trig_preopf, "ffi", "nc")
 BUILTIN(__builtin_amdgcn_rcp, "dd", "nc")
 BUILTIN(__builtin_amdgcn_rcpf, "ff", "nc")
+BUILTIN(__builtin_amdgcn_sqrt, "dd", "nc")
+BUILTIN(__builtin_amdgcn_sqrtf, "ff", "nc")
 BUILTIN(__builtin_amdgcn_rsq, "dd", "nc")
 BUILTIN(__builtin_amdgcn_rsqf, "ff", "nc")
 BUILTIN(__builtin_amdgcn_rsq_clamp, "dd", "nc")
@@ -162,6 +164,7 @@ BUILTIN(__builtin_amdgcn_interp_mov, "fUiUiUiUi", "nc")
 
 TARGET_BUILTIN(__builtin_amdgcn_div_fixuph, "", "nc", "16-bit-insts")
 TARGET_BUILTIN(__builtin_amdgcn_rcph, "hh", "nc", "16-bit-insts")
+TARGET_BUILTIN(__builtin_amdgcn_sqrth, "hh", "nc", "16-bit-insts")
 TARGET_BUILTIN(__builtin_amdgcn_rsqh, "hh", "nc", "16-bit-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sinh, "hh", "nc", "16-bit-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cosh, "hh", "nc", "16-bit-insts")

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2eef4f284271..b5c4841578c4 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14702,6 +14702,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_rcpf:
   case AMDGPU::BI__builtin_amdgcn_rcph:
 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
+  case AMDGPU::BI__builtin_amdgcn_sqrt:
+  case AMDGPU::BI__builtin_amdgcn_sqrtf:
+  case AMDGPU::BI__builtin_amdgcn_sqrth:
+return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
   case AMDGPU::BI__builtin_amdgcn_rsq:
   case AMDGPU::BI__builtin_amdgcn_rsqf:
   case AMDGPU::BI__builtin_amdgcn_rsqh:

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index e3e6b81271d1..5884f84ab081 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -22,6 +22,13 @@ void test_rcp_f16(global half* out, half a)
   *out = __builtin_amdgcn_rcph(a);
 }
 
+// CHECK-LABEL: @test_sqrt_f16
+// CHECK: call half @llvm.amdgcn.sqrt.f16
+void test_sqrt_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_sqrth(a);
+}
+
 // CHECK-LABEL: @test_rsq_f16
 // CHECK: call half @llvm.amdgcn.rsq.f16
 void test_rsq_f16(global half* out, half a)

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 3563ad464c66..56c83df6b6b4 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -116,6 +116,20 @@ void test_rcp_f64(global double* out, double a)
   *out = __builtin_amdgcn_rcp(a);
 }
 
+// CHECK-LABEL: @test_sqrt_f32
+// CHECK: call float @llvm.amdgcn.sqrt.f32
+void test_sqrt_f32(global float* out, float a)
+{
+  *out = __builtin_amdgcn_sqrtf(a);
+}
+
+// CHECK-LABEL: @test_sqrt_f64
+// CHECK: call double @llvm.amdgcn.sqrt.f64
+void test_sqrt_f64(global double* out, double a)
+{
+  *out = __builtin_amdgcn_sqrt(a);
+}
+
 // CHECK-LABEL: @test_rsq_f32
 // CHECK: call float @llvm.amdgcn.rsq.f32
 void test_rsq_f32(global float* out, float a)

diff  --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-f16.cl 
b/clang/test/SemaOpenCL/builtins-amdgcn-error-f16.cl
index 3487b1a5a803..fdb2f3f3c981 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-f16.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-f16.cl
@@ -8,6 +8,7 @@ void test_f16_tahiti(global half *out, half a, half b, half c)
 {
   *out = __builtin_amdgcn_div_fixuph(a, b, c); // expected-error 
{{'__builtin_amdgcn_div_fixuph' needs target feature 16-bit-insts}}
   *out = __builtin_amdgcn_rcph(a); // expect

[clang] 5c03bee - clang: Allow backend unsupported warnings

2020-04-27 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-04-27T12:14:51-04:00
New Revision: 5c03beefa720bddb3e3f53c595a76bce7ad50f37

URL: 
https://github.com/llvm/llvm-project/commit/5c03beefa720bddb3e3f53c595a76bce7ad50f37
DIFF: 
https://github.com/llvm/llvm-project/commit/5c03beefa720bddb3e3f53c595a76bce7ad50f37.diff

LOG: clang: Allow backend unsupported warnings

Currently this asserts on anything other than errors. In one
workaround scenario, AMDGPU emits DiagnosticInfoUnsupported as a
warning for functions that can't be correctly codegened, but should
never be executed.

Added: 
clang/test/CodeGenOpenCL/backend-unsupported-warning.ll

Modified: 
clang/include/clang/Basic/DiagnosticFrontendKinds.td
clang/lib/CodeGen/CodeGenAction.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td 
b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index 87fdfc89c634..9df3e79d183f 100644
--- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -61,6 +61,7 @@ def note_fe_backend_invalid_loc : Note<"could "
   "not determine the original source location for %0:%1:%2">, BackendInfo;
 
 def err_fe_backend_unsupported : Error<"%0">, BackendInfo;
+def warn_fe_backend_unsupported : Warning<"%0">, BackendInfo;
 
 def err_fe_invalid_code_complete_file : Error<
 "cannot locate code-completion file %0">, DefaultFatal;

diff  --git a/clang/lib/CodeGen/CodeGenAction.cpp 
b/clang/lib/CodeGen/CodeGenAction.cpp
index 81946b194495..b8ffe343db22 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -633,8 +633,9 @@ const FullSourceLoc 
BackendConsumer::getBestLocationFromDebugLoc(
 
 void BackendConsumer::UnsupportedDiagHandler(
 const llvm::DiagnosticInfoUnsupported &D) {
-  // We only support errors.
-  assert(D.getSeverity() == llvm::DS_Error);
+  // We only support warnings or errors.
+  assert(D.getSeverity() == llvm::DS_Error ||
+ D.getSeverity() == llvm::DS_Warning);
 
   StringRef Filename;
   unsigned Line, Column;
@@ -652,7 +653,11 @@ void BackendConsumer::UnsupportedDiagHandler(
 DiagnosticPrinterRawOStream DP(MsgStream);
 D.print(DP);
   }
-  Diags.Report(Loc, diag::err_fe_backend_unsupported) << MsgStream.str();
+
+  auto DiagType = D.getSeverity() == llvm::DS_Error
+  ? diag::err_fe_backend_unsupported
+  : diag::warn_fe_backend_unsupported;
+  Diags.Report(Loc, DiagType) << MsgStream.str();
 
   if (BadDebugInfo)
 // If we were not able to translate the file:line:col information

diff  --git a/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll 
b/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll
new file mode 100644
index ..82df1261c1ae
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll
@@ -0,0 +1,30 @@
+; RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -S -o - %s 2>&1 | FileCheck %s
+
+; Check that a DiagnosticUnsupported reported as a warning works
+; correctly, and is not emitted as an error.
+
+; CHECK: warning: test.c:2:20: in function use_lds_global_in_func i32 (): 
local memory global used by non-kernel function
+
+target triple = "amdgcn-amd-amdhsa"
+
+@lds = external addrspace(3) global i32, align 4
+
+define i32 @use_lds_global_in_func() !dbg !5 {
+  %load = load i32, i32 addrspace(3)* @lds, !dbg !9
+  ret i32 %load, !dbg !10
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang 
version 3.9.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, 
enums: !2)
+!1 = !DIFile(filename: "test.c", directory: "")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 2, type: 
!6, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DILocation(line: 2, column: 20, scope: !5)
+!10 = !DILocation(line: 2, column: 13, scope: !5)



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 580a9f2 - Fix test without built AMDGPU

2020-04-27 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-04-27T13:32:00-04:00
New Revision: 580a9f2c307f8d475277f20999ad5820d171beba

URL: 
https://github.com/llvm/llvm-project/commit/580a9f2c307f8d475277f20999ad5820d171beba
DIFF: 
https://github.com/llvm/llvm-project/commit/580a9f2c307f8d475277f20999ad5820d171beba.diff

LOG: Fix test without built AMDGPU

Added: 


Modified: 
clang/test/CodeGenOpenCL/backend-unsupported-warning.ll

Removed: 




diff  --git a/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll 
b/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll
index 82df1261c1ae..597fe94885e3 100644
--- a/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll
+++ b/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll
@@ -1,3 +1,4 @@
+; REQUIRES: amdgpu-registered-target
 ; RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -S -o - %s 2>&1 | FileCheck %s
 
 ; Check that a DiagnosticUnsupported reported as a warning works



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 30eeb74 - clang: Use byref for aggregate kernel arguments

2020-08-06 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-08-06T15:52:26-04:00
New Revision: 30eeb742f1d11d7a7036e3b8a3bffc1dfd252082

URL: 
https://github.com/llvm/llvm-project/commit/30eeb742f1d11d7a7036e3b8a3bffc1dfd252082
DIFF: 
https://github.com/llvm/llvm-project/commit/30eeb742f1d11d7a7036e3b8a3bffc1dfd252082.diff

LOG: clang: Use byref for aggregate kernel arguments

Add address space to indirect abi info and use it for kernels.

Previously, indirect arguments assumed assumed a stack passed object
in the alloca address space using byval. A stack pointer is unsuitable
for kernel arguments, which are passed in a separate, constant buffer
with a different address space.

Start using the new byref for aggregate kernel arguments. Previously
these were emitted as raw struct arguments, and turned into loads in
the backend. These will lower identically, although with byref you now
have the option of applying an explicit alignment. In the future, a
reasonable implementation would use byref for all kernel arguments
(this would be a practical problem at the moment due to losing things
like noalias on pointer arguments).

This is mostly to avoid fighting the optimizer's treatment of
aggregate load/store. SROA and instcombine both turn aggregate loads
and stores into a long sequence of element loads and stores, rather
than the optimizable memcpy I would expect in this situation. Now an
explicit memcpy will be introduced up-front which is better understood
and helps eliminate the alloca in more situations.

This skips using byref in the case where HIP kernel pointer arguments
in structs are promoted to global pointers. At minimum an additional
patch is needed to allow coercion with indirect arguments. This also
skips using it for OpenCL due to the current workaround used to
support kernels calling kernels. Distinct function bodies would need
to be generated up front instead of emitting an illegal call.

Added: 


Modified: 
clang/include/clang/CodeGen/CGFunctionInfo.h
clang/lib/CodeGen/CGCall.cpp
clang/lib/CodeGen/TargetInfo.cpp
clang/test/CodeGenCUDA/kernel-args.cu
clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl

Removed: 




diff  --git a/clang/include/clang/CodeGen/CGFunctionInfo.h 
b/clang/include/clang/CodeGen/CGFunctionInfo.h
index eaf5a3d5aad7..253ef946ce15 100644
--- a/clang/include/clang/CodeGen/CGFunctionInfo.h
+++ b/clang/include/clang/CodeGen/CGFunctionInfo.h
@@ -44,10 +44,23 @@ class ABIArgInfo {
 /// but also emit a zero/sign extension attribute.
 Extend,
 
-/// Indirect - Pass the argument indirectly via a hidden pointer
-/// with the specified alignment (0 indicates default alignment).
+/// Indirect - Pass the argument indirectly via a hidden pointer with the
+/// specified alignment (0 indicates default alignment) and address space.
 Indirect,
 
+/// IndirectAliased - Similar to Indirect, but the pointer may be to an
+/// object that is otherwise referenced.  The object is known to not be
+/// modified through any other references for the duration of the call, and
+/// the callee must not itself modify the object.  Because C allows
+/// parameter variables to be modified and guarantees that they have unique
+/// addresses, the callee must defensively copy the object into a local
+/// variable if it might be modified or its address might be compared.
+/// Since those are uncommon, in principle this convention allows programs
+/// to avoid copies in more situations.  However, it may introduce *extra*
+/// copies if the callee fails to prove that a copy is unnecessary and the
+/// caller naturally produces an unaliased object for the argument.
+IndirectAliased,
+
 /// Ignore - Ignore the argument (treat as void). Useful for void and
 /// empty structs.
 Ignore,
@@ -86,6 +99,7 @@ class ABIArgInfo {
 unsigned AllocaFieldIndex; // isInAlloca()
   };
   Kind TheKind;
+  unsigned IndirectAddrSpace : 24; // isIndirect()
   bool PaddingInReg : 1;
   bool InAllocaSRet : 1;// isInAlloca()
   bool InAllocaIndirect : 1;// isInAlloca()
@@ -97,7 +111,8 @@ class ABIArgInfo {
   bool SignExt : 1; // isExtend()
 
   bool canHavePaddingType() const {
-return isDirect() || isExtend() || isIndirect() || isExpand();
+return isDirect() || isExtend() || isIndirect() || isIndirectAliased() ||
+   isExpand();
   }
   void setPaddingType(llvm::Type *T) {
 assert(canHavePaddingType());
@@ -112,9 +127,10 @@ class ABIArgInfo {
 public:
   ABIArgInfo(Kind K = Direct)
   : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K),
-PaddingInReg(false), InAllocaSRet(false), InAllocaIndirect(false),
-IndirectByVal(false), IndirectRealign(false), SRetAfterThis(false),
-InReg(false), CanBeFlattened(false), SignExt(false) {}
+IndirectAddrSpace(0), PaddingInReg(false), InAlloc

[clang] 684dc1b - Elaborate more on --rocm-path flag.

2020-05-05 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-05-05T09:33:18-04:00
New Revision: 684dc1bebe5cb70cfd27923940f9f8cba4f13195

URL: 
https://github.com/llvm/llvm-project/commit/684dc1bebe5cb70cfd27923940f9f8cba4f13195
DIFF: 
https://github.com/llvm/llvm-project/commit/684dc1bebe5cb70cfd27923940f9f8cba4f13195.diff

LOG: Elaborate more on --rocm-path flag.

I'm not sure what the conventions are for this documentation. The
format seems limiting. I don't see how to refer to other flags, or
mark flags as deprecated. The rst I believe these generate seems to be
in source, and out of date.

Added: 


Modified: 
clang/include/clang/Driver/Options.td

Removed: 




diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index f9850c60f62d..66b98b1e46fa 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -609,9 +609,9 @@ def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, 
Flags<[CC1Option]>,
   HelpText<"Use 32-bit pointers for accessing const/local/shared address 
spaces.">;
 def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">;
 def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group,
-  HelpText<"ROCm installation path">;
+  HelpText<"ROCm installation path, used for finding and automatically linking 
required bitcode libraries.">;
 def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, 
Group,
-  HelpText<"HIP device library path">;
+  HelpText<"HIP device library path. Alternative to rocm-path.">;
 def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group,
   HelpText<"HIP device library">;
 def fhip_dump_offload_linker_script : Flag<["-"], 
"fhip-dump-offload-linker-script">,



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 03cb328 - clang: Cleanup usage of CreateMemCpy

2020-05-08 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-05-08T20:57:56-04:00
New Revision: 03cb328d6f691bde88c754341ff3859d1c1ecc2f

URL: 
https://github.com/llvm/llvm-project/commit/03cb328d6f691bde88c754341ff3859d1c1ecc2f
DIFF: 
https://github.com/llvm/llvm-project/commit/03cb328d6f691bde88c754341ff3859d1c1ecc2f.diff

LOG: clang: Cleanup usage of CreateMemCpy

It handles the the pointee type casts in preparation for opaque
pointers.

Added: 


Modified: 
clang/lib/CodeGen/CGCall.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index e336741d9111..32a9ba499ecb 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1262,11 +1262,9 @@ static llvm::Value *CreateCoercedLoad(Address Src, 
llvm::Type *Ty,
 
   // Otherwise do coercion through memory. This is stupid, but simple.
   Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment());
-  Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
-  Address SrcCasted = CGF.Builder.CreateElementBitCast(Src,CGF.Int8Ty);
-  CGF.Builder.CreateMemCpy(Casted, SrcCasted,
-  llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
-  false);
+  CGF.Builder.CreateMemCpy(Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
+   Src.getPointer(), Src.getAlignment().getAsAlign(),
+   llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize));
   return CGF.Builder.CreateLoad(Tmp);
 }
 
@@ -1349,11 +1347,9 @@ static void CreateCoercedStore(llvm::Value *Src,
 // to that information.
 Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
 CGF.Builder.CreateStore(Src, Tmp);
-Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
-Address DstCasted = CGF.Builder.CreateElementBitCast(Dst,CGF.Int8Ty);
-CGF.Builder.CreateMemCpy(DstCasted, Casted,
-llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
-false);
+CGF.Builder.CreateMemCpy(Dst.getPointer(), Dst.getAlignment().getAsAlign(),
+ Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
+ llvm::ConstantInt::get(CGF.IntPtrTy, DstSize));
   }
 }
 
@@ -2404,10 +2400,10 @@ void CodeGenFunction::EmitFunctionProlog(const 
CGFunctionInfo &FI,
   // FIXME: We should have a common utility for generating an aggregate
   // copy.
   CharUnits Size = getContext().getTypeSizeInChars(Ty);
-  auto SizeVal = llvm::ConstantInt::get(IntPtrTy, Size.getQuantity());
-  Address Dst = Builder.CreateBitCast(AlignedTemp, Int8PtrTy);
-  Address Src = Builder.CreateBitCast(ParamAddr, Int8PtrTy);
-  Builder.CreateMemCpy(Dst, Src, SizeVal, false);
+  Builder.CreateMemCpy(
+  AlignedTemp.getPointer(), 
AlignedTemp.getAlignment().getAsAlign(),
+  ParamAddr.getPointer(), ParamAddr.getAlignment().getAsAlign(),
+  llvm::ConstantInt::get(IntPtrTy, Size.getQuantity()));
   V = AlignedTemp;
 }
 ArgVals.push_back(ParamValue::forIndirect(V));



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] a881dc1 - Fix typo

2020-05-09 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-05-09T16:00:17-04:00
New Revision: a881dc1103579926f039e81c0d25626ff8a582a9

URL: 
https://github.com/llvm/llvm-project/commit/a881dc1103579926f039e81c0d25626ff8a582a9
DIFF: 
https://github.com/llvm/llvm-project/commit/a881dc1103579926f039e81c0d25626ff8a582a9.diff

LOG: Fix typo

Added: 


Modified: 
clang/lib/CodeGen/TargetInfo.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/TargetInfo.cpp 
b/clang/lib/CodeGen/TargetInfo.cpp
index 1e164d3fe2b0..bc5c1682853b 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -8353,7 +8353,7 @@ class AMDGPUABIInfo final : public DefaultABIInfo {
 EltTys, (STy->getName() + ".coerce").str(), STy->isPacked());
   return llvm::StructType::get(getVMContext(), EltTys, STy->isPacked());
 }
-// Arrary types.
+// Array types.
 if (auto ATy = dyn_cast(Ty)) {
   auto T = ATy->getElementType();
   auto NT = coerceKernelArgumentType(T, FromAS, ToAS);



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 123bee6 - AMDGPU: Search for new ROCm bitcode library structure

2020-05-12 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-05-12T09:41:07-04:00
New Revision: 123bee602a260150ff55c74287f583a67ee78f36

URL: 
https://github.com/llvm/llvm-project/commit/123bee602a260150ff55c74287f583a67ee78f36
DIFF: 
https://github.com/llvm/llvm-project/commit/123bee602a260150ff55c74287f583a67ee78f36.diff

LOG: AMDGPU: Search for new ROCm bitcode library structure

The current install situation is a mess, but I'm working on fixing
it. Search for the target layout instead of one of the N options that
exist today.

Added: 
clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/hip.bc
clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/ockl.bc

clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_correctly_rounded_sqrt_off.bc

clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc
clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_daz_opt_off.bc
clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_daz_opt_on.bc

clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_finite_only_off.bc

clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_finite_only_on.bc

clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_isa_version_1010.bc

clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_isa_version_1011.bc

clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_isa_version_1012.bc

clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_isa_version_803.bc

clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_isa_version_900.bc

clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_unsafe_math_off.bc

clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_unsafe_math_on.bc

clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_wavefrontsize64_off.bc

clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_wavefrontsize64_on.bc
clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/ocml.bc
clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/opencl.bc

Modified: 
clang/lib/Driver/ToolChains/AMDGPU.cpp
clang/test/Driver/rocm-device-libs.cl

Removed: 
clang/test/Driver/Inputs/rocm-device-libs/lib/hip.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/ockl.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_off.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_on.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_off.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_on.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1010.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1011.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1012.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_803.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_900.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_off.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_on.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_off.amdgcn.bc

clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_on.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/ocml.amdgcn.bc
clang/test/Driver/Inputs/rocm-device-libs/lib/opencl.amdgcn.bc



diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index e8c0b824ace1..345a24b10018 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -65,30 +65,18 @@ RocmInstallationDetector::RocmInstallationDetector(
 if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
   continue;
 
-// FIXME: The install path situation is a real mess.
-
-// For a cmake install, these are placed directly in
-// ${INSTALL_PREFIX}/lib
-
-// In the separate OpenCL builds, the bitcode libraries are placed in
-// ${OPENCL_ROOT}/lib/x86_64/bitcode/*
-
-// For the rocm installed packages, these are placed at
-// /opt/rocm/opencl/lib/x86_64/bitcode
-
-// An additional copy is installed, in scattered locations between
-// /opt/rocm/hcc/rocdl/oclc
-// /opt/rocm/hcc/rocdl/ockl
-// /opt/rocm/hcc/rocdl/lib
+// The install path situation in old versions of ROCm is a real mess, and
+// use a 
diff erent install layout. Multiple copies of the device libraries
+// exist for each frontend project, and 
diff er depending on which build
+// system produced the pac

[clang] 14e1845 - HIP: Merge builtin library handling

2020-05-12 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-05-12T09:50:22-04:00
New Revision: 14e184571139ba4c7347ea547074c6d9ec9c7b14

URL: 
https://github.com/llvm/llvm-project/commit/14e184571139ba4c7347ea547074c6d9ec9c7b14
DIFF: 
https://github.com/llvm/llvm-project/commit/14e184571139ba4c7347ea547074c6d9ec9c7b14.diff

LOG: HIP: Merge builtin library handling

Merge with the new --rocm-path handling used for OpenCL. This looks
for a usable set of device libraries upfront, rather than giving a
generic "no such file or directory error". If any of the required
bitcode libraries are missing, this will now produce a "cannot find
ROCm installation." error. This differs from the existing hip specific
flags by pointing to a rocm root install instead of a single directory
with bitcode files.

This tries to maintain compatibility with the existing the
--hip-device-lib and --hip-device-lib-path flags, as well as the
HIP_DEVICE_LIB_PATH environment variable, or at least the range of
uses with testcases. The existing range of uses and behavior doesn't
entirely make sense to me, so some of the untested edge cases change
behavior. Currently the two path forms seem to have the double purpose
of a search path for an arbitrary --hip-device-lib, and for finding
the stock set of libraries. Since the stock set of libraries This also
changes the behavior when multiple paths are specified, and only takes
the last one (and the environment variable only handles a single
path).

If --hip-device-lib is used, it now only treats --hip-device-lib-path
as the search path for it, and does not attempt to find the rocm
installation. If not, --hip-device-lib-path and the environment
variable are used as the directory to search instead of the rocm root
based path.

This should also automatically fix handling of the options to use
wave64.

Added: 


Modified: 
clang/lib/Driver/ToolChains/AMDGPU.cpp
clang/lib/Driver/ToolChains/AMDGPU.h
clang/lib/Driver/ToolChains/HIP.cpp
clang/test/Driver/hip-device-libs.hip
clang/test/Driver/rocm-device-libs.cl

Removed: 




diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 345a24b10018..fd81fec5f452 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -21,6 +21,67 @@ using namespace clang::driver::toolchains;
 using namespace clang;
 using namespace llvm::opt;
 
+void RocmInstallationDetector::scanLibDevicePath() {
+  assert(!LibDevicePath.empty());
+
+  const StringRef Suffix(".bc");
+
+  std::error_code EC;
+  for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
+   !EC && LI != LE; LI = LI.increment(EC)) {
+StringRef FilePath = LI->path();
+StringRef FileName = llvm::sys::path::filename(FilePath);
+if (!FileName.endswith(Suffix))
+  continue;
+
+StringRef BaseName = FileName.drop_back(Suffix.size());
+
+if (BaseName == "ocml") {
+  OCML = FilePath;
+} else if (BaseName == "ockl") {
+  OCKL = FilePath;
+} else if (BaseName == "opencl") {
+  OpenCL = FilePath;
+} else if (BaseName == "hip") {
+  HIP = FilePath;
+} else if (BaseName == "oclc_finite_only_off") {
+  FiniteOnly.Off = FilePath;
+} else if (BaseName == "oclc_finite_only_on") {
+  FiniteOnly.On = FilePath;
+} else if (BaseName == "oclc_daz_opt_on") {
+  DenormalsAreZero.On = FilePath;
+} else if (BaseName == "oclc_daz_opt_off") {
+  DenormalsAreZero.Off = FilePath;
+} else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
+  CorrectlyRoundedSqrt.On = FilePath;
+} else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
+  CorrectlyRoundedSqrt.Off = FilePath;
+} else if (BaseName == "oclc_unsafe_math_on") {
+  UnsafeMath.On = FilePath;
+} else if (BaseName == "oclc_unsafe_math_off") {
+  UnsafeMath.Off = FilePath;
+} else if (BaseName == "oclc_wavefrontsize64_on") {
+  WavefrontSize64.On = FilePath;
+} else if (BaseName == "oclc_wavefrontsize64_off") {
+  WavefrontSize64.Off = FilePath;
+} else {
+  // Process all bitcode filenames that look like
+  // ocl_isa_version_XXX.amdgcn.bc
+  const StringRef DeviceLibPrefix = "oclc_isa_version_";
+  if (!BaseName.startswith(DeviceLibPrefix))
+continue;
+
+  StringRef IsaVersionNumber =
+BaseName.drop_front(DeviceLibPrefix.size());
+
+  llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
+  SmallString<8> Tmp;
+  LibDeviceMap.insert(
+std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
+}
+  }
+}
+
 RocmInstallationDetector::RocmInstallationDetector(
 const Driver &D, const llvm::Triple &HostTriple,
 const llvm::opt::ArgList &Args)
@@ -60,6 +121,27 @@ RocmInstallationDetector::RocmInstallationDetector(
 
   bool NoBuiltinLibs = Args.hasArg(options::OPT_nogpulib);
 
+  assert(LibDevicePath.empty

[clang] 235fb7d - AMDGPU/OpenCL: Accept -nostdlib in place of -nogpulib

2020-05-14 Thread Matt Arsenault via cfe-commits

Author: Matt Arsenault
Date: 2020-05-14T12:33:31-04:00
New Revision: 235fb7dc24b1cf7034dfc76bb853ffb4ac5dec5d

URL: 
https://github.com/llvm/llvm-project/commit/235fb7dc24b1cf7034dfc76bb853ffb4ac5dec5d
DIFF: 
https://github.com/llvm/llvm-project/commit/235fb7dc24b1cf7034dfc76bb853ffb4ac5dec5d.diff

LOG: AMDGPU/OpenCL: Accept -nostdlib in place of -nogpulib

-nogpulib makes sense when there is a host (where -nostdlib would
 apply) and offload target. Accept nostdlib when there is no offload
 target as an alias.

Added: 
clang/test/Driver/rocm-detect.hip

Modified: 
clang/lib/Driver/ToolChains/AMDGPU.cpp
clang/test/Driver/rocm-not-found.cl

Removed: 




diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index fd81fec5f452..193ccad98f52 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -361,6 +361,12 @@ void ROCMToolChain::addClangTargetOptions(
   AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
  DeviceOffloadingKind);
 
+  // For the OpenCL case where there is no offload target, accept -nostdlib to
+  // disable bitcode linking.
+  if (DeviceOffloadingKind == Action::OFK_None &&
+  DriverArgs.hasArg(options::OPT_nostdlib))
+return;
+
   if (DriverArgs.hasArg(options::OPT_nogpulib))
 return;
 

diff  --git a/clang/test/Driver/rocm-detect.hip 
b/clang/test/Driver/rocm-detect.hip
new file mode 100644
index ..82ed7138098a
--- /dev/null
+++ b/clang/test/Driver/rocm-detect.hip
@@ -0,0 +1,27 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// Make sure the appropriate device specific library is available.
+
+// We don't include every target in the test directory, so just pick a valid
+// target not included in the test.
+
+// RUN: %clang -### -v -target x86_64-linux-gnu --cuda-gpu-arch=gfx902 \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=COMMON,GFX902-DEFAULTLIBS %s
+
+// Should not interpret -nostdlib as disabling offload libraries.
+// RUN: %clang -### -v -target x86_64-linux-gnu --cuda-gpu-arch=gfx902 
-nostdlib \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=COMMON,GFX902-DEFAULTLIBS %s
+
+
+// RUN: %clang -### -v -target x86_64-linux-gnu --cuda-gpu-arch=gfx902 
-nogpulib \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=COMMON,GFX902,NODEFAULTLIBS %s
+
+
+// GFX902-DEFAULTLIBS: error: cannot find device library for gfx902. Provide 
path to 
diff erent ROCm installation via --rocm-path, or pass -nogpulib to build 
without linking default libraries.
+
+// NODEFAULTLIBS-NOT: error: cannot find

diff  --git a/clang/test/Driver/rocm-not-found.cl 
b/clang/test/Driver/rocm-not-found.cl
index 49b6c7efcf99..8ecc4b0ef105 100644
--- a/clang/test/Driver/rocm-not-found.cl
+++ b/clang/test/Driver/rocm-not-found.cl
@@ -7,5 +7,7 @@
 // RUN: %clang -### --rocm-path=%s/no-rocm-there -target amdgcn--amdhsa %s 
2>&1 | FileCheck %s --check-prefix ERR
 // ERR: cannot find ROCm installation. Provide its path via --rocm-path, or 
pass -nogpulib.
 
+// Accept nogpulib or nostdlib for OpenCL.
 // RUN: %clang -### -nogpulib --rocm-path=%s/no-rocm-there %s 2>&1 | FileCheck 
%s --check-prefix OK
+// RUN: %clang -### -nostdlib --rocm-path=%s/no-rocm-there %s 2>&1 | FileCheck 
%s --check-prefix OK
 // OK-NOT: cannot find ROCm installation.



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25343: [OpenCL] Mark group functions as noduplicate in opencl-c.h

2016-10-06 Thread Matt Arsenault via cfe-commits
arsenm added a comment.

These should be convergent instead


https://reviews.llvm.org/D25343



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r319735 - AMDGPU: Don't add fp64 feature to r600 subtargets

2017-12-04 Thread Matt Arsenault via cfe-commits
Author: arsenm
Date: Mon Dec  4 19:51:26 2017
New Revision: 319735

URL: http://llvm.org/viewvc/llvm-project?rev=319735&view=rev
Log:
AMDGPU: Don't add fp64 feature to r600 subtargets

Should fix test after r319709

Modified:
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=319735&r1=319734&r2=319735&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Mon Dec  4 19:51:26 2017
@@ -194,7 +194,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
 case GK_R700_DOUBLE_OPS:
 case GK_EVERGREEN_DOUBLE_OPS:
 case GK_CAYMAN:
-  Features["fp64"] = true;
+  // TODO: Add fp64 when implemented.
   break;
 case GK_NONE:
   return false;


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[compiler-rt] [clang-tools-extra] [clang] [llvm] [InferAddressSpaces] Fix constant replace to avoid modifying other functions (PR #70611)

2023-11-07 Thread Matt Arsenault via cfe-commits


@@ -334,6 +335,15 @@ template<> struct simplify_type {
   }
 };
 
+template <> struct GraphTraits {

arsenm wrote:

If we allowed bitcasts between address spaces with the same size, we could drop 
addrspacecast constantexprs altogether 

https://github.com/llvm/llvm-project/pull/70611
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic (PR #71139)

2023-11-07 Thread Matt Arsenault via cfe-commits


@@ -17632,8 +17632,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_mov_dpp:
   case AMDGPU::BI__builtin_amdgcn_update_dpp: {
 llvm::SmallVector Args;
-for (unsigned I = 0; I != E->getNumArgs(); ++I)
-  Args.push_back(EmitScalarExpr(E->getArg(I)));
+for (unsigned I = 0; I != E->getNumArgs(); ++I) {
+  llvm::Value *Arg = EmitScalarExpr(E->getArg(I));
+  // Except first two input operands, all other are imm operands for dpp
+  // intrinsic.
+  if (llvm::is_contained(std::initializer_list{2, 3, 4, 5}, I)) {

arsenm wrote:

Yes, the others can be fixed separately. 

https://github.com/llvm/llvm-project/pull/71139
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic (PR #71139)

2023-11-07 Thread Matt Arsenault via cfe-commits


@@ -17632,8 +17632,27 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned 
BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_mov_dpp:
   case AMDGPU::BI__builtin_amdgcn_update_dpp: {
 llvm::SmallVector Args;
-for (unsigned I = 0; I != E->getNumArgs(); ++I)
-  Args.push_back(EmitScalarExpr(E->getArg(I)));
+// Find out if any arguments are required to be integer constant

arsenm wrote:

Can you factor the existing generic code into a helper function? This is mostly 
copy paste of the default builtin path 

https://github.com/llvm/llvm-project/pull/71139
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AMDGPU] Emit a waitcnt instruction after each memory instruction (PR #68932)

2023-11-08 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm requested changes to this pull request.


https://github.com/llvm/llvm-project/pull/68932
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Emit a waitcnt instruction after each memory instruction (PR #68932)

2023-11-08 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/68932
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Emit a waitcnt instruction after each memory instruction (PR #68932)

2023-11-08 Thread Matt Arsenault via cfe-commits


@@ -52,6 +52,11 @@ static cl::opt ForceEmitZeroFlag(
   cl::desc("Force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) 
expcnt(0) lgkmcnt(0)"),
   cl::init(false), cl::Hidden);
 
+static cl::opt
+PreciseMemOpFlag("amdgpu-precise-memory-op",
+ cl::desc("Emit s_waitcnt 0 after each memory operation"),
+ cl::init(false));
+

arsenm wrote:

I think this should be fused into an enum flag with the existing waitcnt flag.

https://github.com/llvm/llvm-project/pull/68932
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Emit a waitcnt instruction after each memory instruction (PR #68932)

2023-11-08 Thread Matt Arsenault via cfe-commits


@@ -1809,6 +1816,23 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
   return HasVMemLoad && UsesVgprLoadedOutside;
 }
 
+bool SIInsertWaitcnts::insertWaitcntAfterMemOp(MachineFunction &MF) {
+  bool Modified = false;
+
+  for (auto &MBB : MF) {

arsenm wrote:

Should try to integrate with the rest of the logic instead of adding a separate 
pass over the function 

https://github.com/llvm/llvm-project/pull/68932
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic (PR #71139)

2023-11-08 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm approved this pull request.

Could probably golf this down for more sharing with the default path but this 
is a start 

https://github.com/llvm/llvm-project/pull/71139
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] clang: Add pragma clang fp reciprocal (PR #68267)

2023-11-08 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/68267
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revert "Revert "[AMDGPU] const-fold imm operands of (PR #71669)

2023-11-08 Thread Matt Arsenault via cfe-commits


@@ -0,0 +1,48 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang --offload-arch=gfx906 -S -o - -emit-llvm --cuda-device-only 
-nogpuinc -nogpulib\

arsenm wrote:

This should use %clang_cc1, look at some of the other CodeGen* test run lines 
for examples 

https://github.com/llvm/llvm-project/pull/71669
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revert "Revert "[AMDGPU] const-fold imm operands of (PR #71669)

2023-11-08 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/71669
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[openmp] [clang] [llvm] [OpenMP] Rework handling of global ctor/dtors in OpenMP (PR #71739)

2023-11-09 Thread Matt Arsenault via cfe-commits


@@ -2794,6 +2794,14 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction 
&CGF, const VarDecl &D,
   if (D.isNoDestroy(CGM.getContext()))
 return;
 
+  // OpenMP offloading supports C++ constructors and destructors but we do not
+  // always have 'atexit' available. Instead lower these to use the LLVM global
+  // destructors which we can handle directly in the runtime.
+  if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPIsTargetDevice &&
+  !D.isStaticLocal() &&
+  (CGM.getTriple().isAMDGPU() || CGM.getTriple().isNVPTX()))

arsenm wrote:

Oh look, it's both of my favorite patterns. Can you refine this into something 
better than language X | language Y and AMDGPU || PTX 

https://github.com/llvm/llvm-project/pull/71739
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[compiler-rt] [llvm] [clang-tools-extra] [clang] [InferAddressSpaces] Fix constant replace to avoid modifying other functions (PR #70611)

2023-11-09 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm approved this pull request.

I think it would be better if we could eliminate ConstantExpr addrspacecasts 
from the IR altogether, which would avoid most of the complexity here. I would 
also somewhat prefer to push this DFS into a helper function, but can live with 
it inline as-is

https://github.com/llvm/llvm-project/pull/70611
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [CodeGen] Implement post-opt linking option for builtin bitocdes (PR #69371)

2023-11-09 Thread Matt Arsenault via cfe-commits


@@ -113,7 +120,7 @@ class EmitAssemblyHelper {
   const CodeGenOptions &CodeGenOpts;
   const clang::TargetOptions &TargetOpts;
   const LangOptions &LangOpts;
-  Module *TheModule;
+  llvm::Module *TheModule;

arsenm wrote:

Why did this suddenly need qualification?

https://github.com/llvm/llvm-project/pull/69371
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [CodeGen] Implement post-opt linking option for builtin bitocdes (PR #69371)

2023-11-09 Thread Matt Arsenault via cfe-commits


@@ -98,6 +100,11 @@ extern cl::opt PrintPipelinePasses;
 static cl::opt ClSanitizeOnOptimizerEarlyEP(
 "sanitizer-early-opt-ep", cl::Optional,
 cl::desc("Insert sanitizers on OptimizerEarlyEP."), cl::init(false));
+
+// Re-link builtin bitcodes after optimization
+static cl::opt ClRelinkBuiltinBitcodePostop(
+"relink-builtin-bitcode-postop", cl::Optional,
+cl::desc("Re-link builtin bitcodes after optimization."), cl::init(false));

arsenm wrote:

Not a proper flag? Where/how is -mlink-builtin-bitcode defined?

https://github.com/llvm/llvm-project/pull/69371
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Emit a waitcnt instruction after each memory instruction (PR #68932)

2023-11-09 Thread Matt Arsenault via cfe-commits


@@ -1809,6 +1816,23 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
   return HasVMemLoad && UsesVgprLoadedOutside;
 }
 
+bool SIInsertWaitcnts::insertWaitcntAfterMemOp(MachineFunction &MF) {
+  bool Modified = false;
+
+  for (auto &MBB : MF) {

arsenm wrote:

I think it makes it harder to reason about the pass as a whole to have it as a 
totally separate phase

https://github.com/llvm/llvm-project/pull/68932
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AMDGPU] Emit a waitcnt instruction after each memory instruction (PR #68932)

2023-11-09 Thread Matt Arsenault via cfe-commits


@@ -1809,6 +1816,23 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
   return HasVMemLoad && UsesVgprLoadedOutside;
 }
 
+bool SIInsertWaitcnts::insertWaitcntAfterMemOp(MachineFunction &MF) {
+  bool Modified = false;
+
+  for (auto &MBB : MF) {

arsenm wrote:

Plus I think the two separate, but closely related cl::opts is confusing 

https://github.com/llvm/llvm-project/pull/68932
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [openmp] [OpenMP] Rework handling of global ctor/dtors in OpenMP (PR #71739)

2023-11-09 Thread Matt Arsenault via cfe-commits


@@ -2794,6 +2794,14 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction 
&CGF, const VarDecl &D,
   if (D.isNoDestroy(CGM.getContext()))
 return;
 
+  // OpenMP offloading supports C++ constructors and destructors but we do not
+  // always have 'atexit' available. Instead lower these to use the LLVM global
+  // destructors which we can handle directly in the runtime.
+  if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPIsTargetDevice &&
+  !D.isStaticLocal() &&
+  (CGM.getTriple().isAMDGPU() || CGM.getTriple().isNVPTX()))

arsenm wrote:

Would also just hide this in a target/lang predicate that lists these 

https://github.com/llvm/llvm-project/pull/71739
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] clang: Add pragma clang fp reciprocal (PR #68267)

2023-11-09 Thread Matt Arsenault via cfe-commits


@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -O3 -triple %itanium_abi_triple -emit-llvm -o - %s | 
FileCheck -check-prefixes=CHECK,DEFAULT %s
+// RUN: %clang_cc1 -O3 -triple %itanium_abi_triple -freciprocal-math 
-emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,FLAG %s
+
+float base(float a, float b, float c) {
+// CHECK-LABEL: _Z4basefff
+// FLAG: %[[A:.+]] = fdiv arcp float %b, %c
+// FLAG: %[[M:.+]] = fdiv arcp float %[[A]], %b
+// FLAG-NEXT: fadd arcp float %[[M]], %c
+
+// DEFAULT: %[[A:.+]] = fdiv float %b, %c
+// DEFAULT: %[[M:.+]] = fdiv float %[[A]], %b
+// DEFAULT-NEXT: fadd float %[[M]], %c
+  a = b / c;
+  return a / b + c;
+}
+
+// Simple case
+float fp_recip_simple(float a, float b, float c) {
+// CHECK-LABEL: _Z15fp_recip_simplefff
+// CHECK: %[[A:.+]] = fdiv arcp float %b, %c
+// CHECK: %[[M:.+]] = fdiv arcp float %[[A]], %b
+// CHECK-NEXT: fadd arcp float %[[M]], %c
+#pragma clang fp reciprocal(on)
+  a = b / c;
+  return a / b + c;
+}
+
+// Test interaction with -freciprocal-math
+float fp_recip_disable(float a, float b, float c) {
+// CHECK-LABEL: _Z16fp_recip_disablefff
+// CHECK: %[[A:.+]] = fdiv float %b, %c
+// CHECK: %[[M:.+]] = fdiv float %[[A]], %b
+// CHECK-NEXT: fadd float %[[M]], %c
+#pragma clang fp reciprocal(off)
+  a = b / c;
+  return a / b + c;
+}
+
+float fp_recip_with_reassoc_simple(float a, float b, float c) {
+// CHECK-LABEL: _Z28fp_recip_with_reassoc_simplefff
+// CHECK: %[[A:.+]] = fmul reassoc arcp float %b, %c
+// CHECK: %[[M:.+]] = fdiv reassoc arcp float %b, %[[A]]
+// CHECK-NEXT: fadd reassoc arcp float %[[M]], %c
+#pragma clang fp reciprocal(on) reassociate(on)
+  a = b / c;
+  return a / b + c;
+}
+
+// arcp pragma should only apply to its scope
+float fp_recip_scoped(float a, float b, float c) {
+  // CHECK-LABEL: _Z15fp_recip_scopedfff
+  // DEFAULT: %[[M:.+]] = fdiv float %a, %b
+  // DEFAULT-NEXT: fadd float %[[M]], %c
+  // FLAG: %[[M:.+]] = fdiv arcp float %a, %b
+  // FLAG-NEXT: fadd arcp float %[[M]], %c
+  {
+#pragma clang fp reciprocal(on)
+  }
+  return a / b + c;
+}
+
+// arcp pragma should apply to templates as well
+class Foo {};
+Foo operator+(Foo, Foo);
+template 
+T template_recip(T a, T b, T c) {
+#pragma clang fp reciprocal(on)
+  return ((a / b) - c) + c;
+}
+
+float fp_recip_template(float a, float b, float c) {
+  // CHECK-LABEL: _Z17fp_recip_templatefff
+  // CHECK: %[[A1:.+]] = fdiv arcp float %a, %b
+  // CHECK-NEXT: %[[A2:.+]] = fsub arcp float %[[A1]], %c
+  // CHECK-NEXT: fadd arcp float %[[A2]], %c
+  return template_recip(a, b, c);
+}
+
+// File Scoping should work across functions
+#pragma clang fp reciprocal(on)
+float fp_file_scope_on(float a, float b, float c) {
+  // CHECK-LABEL: _Z16fp_file_scope_onfff
+  // CHECK: %[[M1:.+]] = fdiv arcp float %a, %c
+  // CHECK-NEXT: %[[M2:.+]] = fdiv arcp float %b, %c
+  // CHECK-NEXT: fadd arcp float %[[M1]], %[[M2]]
+  return (a / c) + (b / c);
+}
+
+// Inner pragma has precedence
+float fp_file_scope_stop(float a, float b, float c) {
+  // CHECK-LABEL: _Z18fp_file_scope_stopfff
+  // CHECK: %[[A:.+]] = fdiv arcp float %a, %a
+  // CHECK: %[[M1:.+]] = fdiv float %[[A]], %c
+  // CHECK-NEXT: %[[M2:.+]] = fdiv float %b, %c
+  // CHECK-NEXT: fsub float %[[M1]], %[[M2]]
+  a = a / a;
+  {
+#pragma clang fp reciprocal(off)
+return (a / c) - (b / c);
+  }
+}
+
+#pragma clang fp reciprocal(off)
+float fp_recip_off(float a, float b, float c) {
+  // CHECK-LABEL: _Z12fp_recip_of
+  // CHECK: %[[D1:.+]] = fdiv float %a, %c
+  // CHECK-NEXT: %[[D2:.+]] = fdiv float %b, %c
+  // CHECK-NEXT: fadd float %[[D1]], %[[D2]]
+  return (a / c) + (b / c);
+}
+
+// Takes latest flag
+float fp_recip_many(float a, float b, float c) {
+// CHECK-LABEL: _Z13fp_recip_manyfff
+// CHECK: %[[D1:.+]] = fdiv arcp float %a, %c
+// CHECK-NEXT: %[[D2:.+]] = fdiv arcp float %b, %c
+// CHECK-NEXT: fadd arcp float %[[D1]], %[[D2]]
+#pragma clang fp reciprocal(off) reciprocal(on)
+  return (a / c) + (b / c);
+}
+
+// Pragma does not propagate through called functions
+float helper_func(float a, float b, float c) { return a + b + c; }
+float fp_recip_call_helper(float a, float b, float c) {
+// CHECK-LABEL: _Z20fp_recip_call_helperfff
+// CHECK: %[[S1:.+]] = fadd float %a, %b
+// CHECK-NEXT: fadd float %[[S1]], %c
+#pragma clang fp reciprocal(on)
+  return helper_func(a, b, c);
+}

arsenm wrote:

That's already here on line 44 

https://github.com/llvm/llvm-project/pull/68267
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CGExprScalar] Remove no-op ptr-to-ptr bitcast (NFC) (PR #72072)

2023-11-12 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/72072
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CGExprScalar] Remove no-op ptr-to-ptr bitcast (NFC) (PR #72072)

2023-11-12 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/72072
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][CGExprScalar] Remove no-op ptr-to-ptr bitcast (NFC) (PR #72072)

2023-11-12 Thread Matt Arsenault via cfe-commits


@@ -2227,14 +2227,6 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
 llvm::Value *V = CE->changesVolatileQualification()
  ? EmitLoadOfLValue(CE)
  : Visit(const_cast(E));
-if (V) {
-  // CK_NoOp can model a pointer qualification conversion, which can remove
-  // an array bound and change the IR type.
-  // FIXME: Once pointee types are removed from IR, remove this.
-  llvm::Type *T = ConvertType(DestTy);
-  if (T != V->getType())
-V = Builder.CreateBitCast(V, T);
-}
 return V;

arsenm wrote:

Could fold this into a direct return CE->...

https://github.com/llvm/llvm-project/pull/72072
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [compiler-rt] [HIP] support 128 bit int division (PR #71978)

2023-11-12 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm commented:

128-bit division should already work, we have an IR integer division expansion 
for > 64-bit divides. I think moving towards getting the infrastructure to a 
place where we can link in compiler-rt binaries is a good thing, but I don't 
think we're in a position to actually enable that at this time. We still don't 
have everything necessary to provide object linking, which this seems to rely 
on 

https://github.com/llvm/llvm-project/pull/71978
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[compiler-rt] [clang] [llvm] [HIP] support 128 bit int division (PR #71978)

2023-11-12 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/71978
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[compiler-rt] [llvm] [clang] [HIP] support 128 bit int division (PR #71978)

2023-11-12 Thread Matt Arsenault via cfe-commits


@@ -937,27 +938,105 @@ bool CodeGenAction::loadLinkModules(CompilerInstance 
&CI) {
 
   for (const CodeGenOptions::BitcodeFileToLink &F :
CI.getCodeGenOpts().LinkBitcodeFiles) {
-auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
-if (!BCBuf) {
+
+auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename);
+if (!BCBufOrErr) {
   CI.getDiagnostics().Report(diag::err_cannot_open_file)
-  << F.Filename << BCBuf.getError().message();
+  << F.Filename << BCBufOrErr.getError().message();
   LinkModules.clear();
   return true;
 }
 
+auto &BCBuf = *BCBufOrErr;
+
 Expected> ModuleOrErr =
-getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext);
-if (!ModuleOrErr) {
-  handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext);
+
+if (ModuleOrErr) {
+  LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+ F.Internalize, F.LinkFlags});
+  continue;
+} else {
+  // If parsing as bitcode failed, clear the error and try to parse as an
+  // archive.
+  handleAllErrors(ModuleOrErr.takeError(),
+  [&](const llvm::ErrorInfoBase &EIB) {});
+
+  Expected> BinOrErr =
+  llvm::object::createBinary(BCBuf->getMemBufferRef(), VMContext);
+
+  if (!BinOrErr) {
+handleAllErrors(BinOrErr.takeError(),
+[&](const llvm::ErrorInfoBase &EIB) {
+  
CI.getDiagnostics().Report(diag::err_cannot_open_file)
+  << F.Filename << EIB.message();
+});
+LinkModules.clear();
+return true;
+  }
+
+  std::unique_ptr &Bin = *BinOrErr;
+
+  if (Bin->isArchive()) {
+llvm::object::Archive *Archive =
+llvm::cast(Bin.get());
+Error Err = Error::success();
+
+for (auto &Child : Archive->children(Err)) {
+  Expected ChildBufOrErr =
+  Child.getMemoryBufferRef();
+  if (!ChildBufOrErr) {

arsenm wrote:

Can you add some tests for the various error cases?

https://github.com/llvm/llvm-project/pull/71978
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [compiler-rt] [HIP] support 128 bit int division (PR #71978)

2023-11-12 Thread Matt Arsenault via cfe-commits


@@ -937,27 +938,105 @@ bool CodeGenAction::loadLinkModules(CompilerInstance 
&CI) {
 
   for (const CodeGenOptions::BitcodeFileToLink &F :
CI.getCodeGenOpts().LinkBitcodeFiles) {
-auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
-if (!BCBuf) {
+
+auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename);
+if (!BCBufOrErr) {
   CI.getDiagnostics().Report(diag::err_cannot_open_file)
-  << F.Filename << BCBuf.getError().message();
+  << F.Filename << BCBufOrErr.getError().message();
   LinkModules.clear();
   return true;
 }
 
+auto &BCBuf = *BCBufOrErr;
+
 Expected> ModuleOrErr =
-getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext);
-if (!ModuleOrErr) {
-  handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext);
+
+if (ModuleOrErr) {
+  LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+ F.Internalize, F.LinkFlags});
+  continue;
+} else {
+  // If parsing as bitcode failed, clear the error and try to parse as an
+  // archive.
+  handleAllErrors(ModuleOrErr.takeError(),
+  [&](const llvm::ErrorInfoBase &EIB) {});
+
+  Expected> BinOrErr =
+  llvm::object::createBinary(BCBuf->getMemBufferRef(), VMContext);
+
+  if (!BinOrErr) {
+handleAllErrors(BinOrErr.takeError(),
+[&](const llvm::ErrorInfoBase &EIB) {
+  
CI.getDiagnostics().Report(diag::err_cannot_open_file)
+  << F.Filename << EIB.message();
+});
+LinkModules.clear();
+return true;
+  }
+
+  std::unique_ptr &Bin = *BinOrErr;
+
+  if (Bin->isArchive()) {
+llvm::object::Archive *Archive =
+llvm::cast(Bin.get());
+Error Err = Error::success();
+
+for (auto &Child : Archive->children(Err)) {
+  Expected ChildBufOrErr =
+  Child.getMemoryBufferRef();
+  if (!ChildBufOrErr) {
+handleAllErrors(
+ChildBufOrErr.takeError(), [&](const llvm::ErrorInfoBase &EIB) 
{
+  CI.getDiagnostics().Report(diag::err_cannot_open_file)
+  << F.Filename << EIB.message();
+});
+continue;
+  }
+  auto ChildBuffer = llvm::MemoryBuffer::getMemBufferCopy(
+  ChildBufOrErr->getBuffer(), 
ChildBufOrErr->getBufferIdentifier());
+
+  if (!ChildBuffer) {
+handleAllErrors(
+ChildBufOrErr.takeError(), [&](const llvm::ErrorInfoBase &EIB) 
{
+  CI.getDiagnostics().Report(diag::err_cannot_open_file)
+  << F.Filename << EIB.message();
+});
+continue;
+  }
+
+  Expected> ChildModuleOrErr =
+  getOwningLazyBitcodeModule(std::move(ChildBuffer), *VMContext);
+  if (!ChildModuleOrErr) {
+handleAllErrors(
+ChildModuleOrErr.takeError(),
+[&](const llvm::ErrorInfoBase &EIB) {
+  CI.getDiagnostics().Report(diag::err_cannot_open_file)
+  << F.Filename << EIB.message();
+});
+continue;
+  }
+
+  LinkModules.push_back({std::move(ChildModuleOrErr.get()),

arsenm wrote:

Not sure you need the .get()

https://github.com/llvm/llvm-project/pull/71978
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [compiler-rt] [HIP] support 128 bit int division (PR #71978)

2023-11-12 Thread Matt Arsenault via cfe-commits


@@ -937,27 +938,105 @@ bool CodeGenAction::loadLinkModules(CompilerInstance 
&CI) {
 
   for (const CodeGenOptions::BitcodeFileToLink &F :
CI.getCodeGenOpts().LinkBitcodeFiles) {
-auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
-if (!BCBuf) {
+
+auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename);
+if (!BCBufOrErr) {
   CI.getDiagnostics().Report(diag::err_cannot_open_file)
-  << F.Filename << BCBuf.getError().message();
+  << F.Filename << BCBufOrErr.getError().message();
   LinkModules.clear();
   return true;
 }
 
+auto &BCBuf = *BCBufOrErr;
+
 Expected> ModuleOrErr =
-getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext);
-if (!ModuleOrErr) {
-  handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext);
+
+if (ModuleOrErr) {
+  LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+ F.Internalize, F.LinkFlags});
+  continue;
+} else {

arsenm wrote:

no else after continue 

https://github.com/llvm/llvm-project/pull/71978
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [compiler-rt] [llvm] [HIP] support 128 bit int division (PR #71978)

2023-11-12 Thread Matt Arsenault via cfe-commits


@@ -937,27 +938,105 @@ bool CodeGenAction::loadLinkModules(CompilerInstance 
&CI) {
 
   for (const CodeGenOptions::BitcodeFileToLink &F :
CI.getCodeGenOpts().LinkBitcodeFiles) {
-auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
-if (!BCBuf) {
+
+auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename);
+if (!BCBufOrErr) {
   CI.getDiagnostics().Report(diag::err_cannot_open_file)
-  << F.Filename << BCBuf.getError().message();
+  << F.Filename << BCBufOrErr.getError().message();
   LinkModules.clear();
   return true;
 }
 
+auto &BCBuf = *BCBufOrErr;
+
 Expected> ModuleOrErr =
-getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext);
-if (!ModuleOrErr) {
-  handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext);
+
+if (ModuleOrErr) {
+  LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+ F.Internalize, F.LinkFlags});
+  continue;
+} else {
+  // If parsing as bitcode failed, clear the error and try to parse as an
+  // archive.
+  handleAllErrors(ModuleOrErr.takeError(),
+  [&](const llvm::ErrorInfoBase &EIB) {});
+
+  Expected> BinOrErr =
+  llvm::object::createBinary(BCBuf->getMemBufferRef(), VMContext);
+
+  if (!BinOrErr) {
+handleAllErrors(BinOrErr.takeError(),
+[&](const llvm::ErrorInfoBase &EIB) {
+  
CI.getDiagnostics().Report(diag::err_cannot_open_file)
+  << F.Filename << EIB.message();
+});
+LinkModules.clear();
+return true;
+  }
+
+  std::unique_ptr &Bin = *BinOrErr;
+
+  if (Bin->isArchive()) {

arsenm wrote:

Can you split all of this out into an archive handling helper function?

https://github.com/llvm/llvm-project/pull/71978
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [compiler-rt] [HIP] support 128 bit int division (PR #71978)

2023-11-12 Thread Matt Arsenault via cfe-commits


@@ -937,27 +938,105 @@ bool CodeGenAction::loadLinkModules(CompilerInstance 
&CI) {
 
   for (const CodeGenOptions::BitcodeFileToLink &F :
CI.getCodeGenOpts().LinkBitcodeFiles) {
-auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
-if (!BCBuf) {
+
+auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename);
+if (!BCBufOrErr) {
   CI.getDiagnostics().Report(diag::err_cannot_open_file)
-  << F.Filename << BCBuf.getError().message();
+  << F.Filename << BCBufOrErr.getError().message();
   LinkModules.clear();
   return true;
 }
 
+auto &BCBuf = *BCBufOrErr;
+
 Expected> ModuleOrErr =
-getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext);
-if (!ModuleOrErr) {
-  handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext);
+
+if (ModuleOrErr) {
+  LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+ F.Internalize, F.LinkFlags});
+  continue;
+} else {
+  // If parsing as bitcode failed, clear the error and try to parse as an
+  // archive.
+  handleAllErrors(ModuleOrErr.takeError(),
+  [&](const llvm::ErrorInfoBase &EIB) {});
+
+  Expected> BinOrErr =
+  llvm::object::createBinary(BCBuf->getMemBufferRef(), VMContext);
+
+  if (!BinOrErr) {
+handleAllErrors(BinOrErr.takeError(),
+[&](const llvm::ErrorInfoBase &EIB) {
+  
CI.getDiagnostics().Report(diag::err_cannot_open_file)
+  << F.Filename << EIB.message();
+});
+LinkModules.clear();
+return true;
+  }
+
+  std::unique_ptr &Bin = *BinOrErr;
+
+  if (Bin->isArchive()) {
+llvm::object::Archive *Archive =
+llvm::cast(Bin.get());
+Error Err = Error::success();

arsenm wrote:

I assume this doesn't require initialization 

https://github.com/llvm/llvm-project/pull/71978
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [compiler-rt] [HIP] support 128 bit int division (PR #71978)

2023-11-12 Thread Matt Arsenault via cfe-commits


@@ -596,6 +596,7 @@ static bool mustPreserveGV(const GlobalValue &GV) {
   if (const Function *F = dyn_cast(&GV))
 return F->isDeclaration() || F->getName().startswith("__asan_") ||
F->getName().startswith("__sanitizer_") ||
+   F->getName() == "__divti3" ||

arsenm wrote:

we're stuck preserving this in the IR at all times which isn't really ideal 

https://github.com/llvm/llvm-project/pull/71978
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [compiler-rt] [HIP] support 128 bit int division (PR #71978)

2023-11-12 Thread Matt Arsenault via cfe-commits


@@ -937,27 +938,105 @@ bool CodeGenAction::loadLinkModules(CompilerInstance 
&CI) {
 
   for (const CodeGenOptions::BitcodeFileToLink &F :
CI.getCodeGenOpts().LinkBitcodeFiles) {
-auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
-if (!BCBuf) {
+
+auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename);
+if (!BCBufOrErr) {
   CI.getDiagnostics().Report(diag::err_cannot_open_file)
-  << F.Filename << BCBuf.getError().message();
+  << F.Filename << BCBufOrErr.getError().message();
   LinkModules.clear();
   return true;
 }
 
+auto &BCBuf = *BCBufOrErr;
+
 Expected> ModuleOrErr =
-getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext);
-if (!ModuleOrErr) {
-  handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext);
+
+if (ModuleOrErr) {
+  LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
+ F.Internalize, F.LinkFlags});
+  continue;
+} else {
+  // If parsing as bitcode failed, clear the error and try to parse as an
+  // archive.
+  handleAllErrors(ModuleOrErr.takeError(),
+  [&](const llvm::ErrorInfoBase &EIB) {});
+
+  Expected> BinOrErr =
+  llvm::object::createBinary(BCBuf->getMemBufferRef(), VMContext);
+
+  if (!BinOrErr) {
+handleAllErrors(BinOrErr.takeError(),
+[&](const llvm::ErrorInfoBase &EIB) {
+  
CI.getDiagnostics().Report(diag::err_cannot_open_file)
+  << F.Filename << EIB.message();
+});
+LinkModules.clear();
+return true;
+  }
+
+  std::unique_ptr &Bin = *BinOrErr;
+
+  if (Bin->isArchive()) {
+llvm::object::Archive *Archive =
+llvm::cast(Bin.get());
+Error Err = Error::success();
+
+for (auto &Child : Archive->children(Err)) {
+  Expected ChildBufOrErr =
+  Child.getMemoryBufferRef();
+  if (!ChildBufOrErr) {

arsenm wrote:

Also the base case, a driver test would help me see what this is actually doing

https://github.com/llvm/llvm-project/pull/71978
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [compiler-rt] [HIP] support 128 bit int division (PR #71978)

2023-11-12 Thread Matt Arsenault via cfe-commits


@@ -3630,10 +3631,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo 
&CLI,
 
   std::vector Ops;
   Ops.push_back(Chain);
+  bool AddTargetGlobalAddr = true;
+  // Try to find the callee in the current module.
+  if (isa(Callee)) {
+Callee = DAG.getSymbolFunctionGlobalAddress(Callee);
+AddTargetGlobalAddr = false;
+  }

arsenm wrote:

This should be split into a separate backend only change 

https://github.com/llvm/llvm-project/pull/71978
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] clang: Add pragma clang fp reciprocal (PR #68267)

2023-11-13 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/68267

>From f09d8efdcbb5ffb9cd39d686205a120b6a82a01b Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 31 Aug 2023 17:33:35 -0400
Subject: [PATCH] clang: Add pragma clang fp reciprocal

Just follow along with the reassociate pragma. This
allows locally setting the arcp fast math flag. Previously you
could only access this through the global -freciprocal-math.
---
 clang/docs/LanguageExtensions.rst |  16 +++
 clang/docs/ReleaseNotes.rst   |   2 +
 .../clang/Basic/DiagnosticParseKinds.td   |   3 +-
 .../clang/Basic/DiagnosticSemaKinds.td|   2 +-
 clang/include/clang/Basic/PragmaKinds.h   |   8 ++
 clang/include/clang/Sema/Sema.h   |   5 +-
 clang/lib/Parse/ParsePragma.cpp   |  51 ---
 clang/lib/Sema/SemaAttr.cpp   |  18 ++-
 clang/test/CodeGen/fp-reciprocal-pragma.cpp   | 130 ++
 clang/test/Parser/pragma-fp-contract.c|  15 ++
 clang/test/Parser/pragma-fp.cpp   |   4 +-
 .../test/Sema/eval-method-with-unsafe-math.c  |  32 +
 12 files changed, 256 insertions(+), 30 deletions(-)
 create mode 100644 clang/test/CodeGen/fp-reciprocal-pragma.cpp

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index 30e288f986782fd..090600275956be0 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -4617,6 +4617,22 @@ The pragma can take two values: ``on`` and ``off``.
 float v = t + z;
   }
 
+``#pragma clang fp reciprocal`` allows control over using reciprocal
+approximations in floating point expressions. When enabled, this
+pragma allows the expression ``x / y`` to be approximated as ``x *
+(1.0 / y)``.  This pragma can be used to disable reciprocal
+approximation when it is otherwise enabled for the translation unit
+with the ``-freciprocal-math`` flag or other fast-math options. The
+pragma can take two values: ``on`` and ``off``.
+
+.. code-block:: c++
+
+  float f(float x, float y)
+  {
+// Enable floating point reciprocal approximation
+#pragma clang fp reciprocal(on)
+return x / y;
+  }
 
 ``#pragma clang fp contract`` specifies whether the compiler should
 contract a multiply and an addition (or subtraction) into a fused FMA
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 74358219ba9fb22..a8b68fb8c3ee486 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -218,6 +218,8 @@ Non-comprehensive list of changes in this release
   For scalable vectors, e.g., SVE or RISC-V V, the number of elements is not 
known at compile-time and is
   determined at runtime.
 
+* Added ``#pragma clang fp reciprocal``.
+
 New Compiler Flags
 --
 
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td 
b/clang/include/clang/Basic/DiagnosticParseKinds.td
index de180344fcc5c74..2f3bef33f936883 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1569,12 +1569,13 @@ def note_pragma_loop_invalid_vectorize_option : Note<
   "vectorize_width(X, scalable) where X is an integer, or 
vectorize_width('fixed' or 'scalable')">;
 
 def err_pragma_fp_invalid_option : Error<
-  "%select{invalid|missing}0 option%select{ %1|}0; expected 'contract', 
'reassociate' or 'exceptions'">;
+  "%select{invalid|missing}0 option%select{ %1|}0; expected 'contract', 
'reassociate', 'reciprocal', or 'exceptions'">;
 def err_pragma_fp_invalid_argument : Error<
   "unexpected argument '%0' to '#pragma clang fp %1'; expected "
   "%select{"
   "'fast' or 'on' or 'off'|"
   "'on' or 'off'|"
+  "'on' or 'off'|"
   "'ignore', 'maytrap' or 'strict'|"
   "'source', 'double' or 'extended'}2">;
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 4614324babb1c91..19f027848b177dc 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6755,7 +6755,7 @@ def warn_floatingpoint_eq : Warning<
 
 def err_setting_eval_method_used_in_unsafe_context : Error <
   "%select{'#pragma clang fp eval_method'|option 'ffp-eval-method'}0 cannot be 
used with "
-  "%select{option 'fapprox-func'|option 'mreassociate'|option 
'freciprocal'|option 'ffp-eval-method'|'#pragma clang fp reassociate'}1">;
+  "%select{option 'fapprox-func'|option 'mreassociate'|option 
'freciprocal'|option 'ffp-eval-method'|'#pragma clang fp reassociate'|'#pragma 
clang fp reciprocal'}1">;
 
 def warn_remainder_division_by_zero : Warning<
   "%select{remainder|division}0 by zero is undefined">,
diff --git a/clang/include/clang/Basic/PragmaKinds.h 
b/clang/include/clang/Basic/PragmaKinds.h
index 176bbc9ac7caaec..42f049f7323d2d4 100644
--- a/clang/include/clang/Basic/PragmaKinds.h
+++ b/clang/include/clang/Basic/PragmaKinds.h
@@ -34,6 +34,14 @@ enum Pragm

[clang] [llvm] [flang] [NFC][AMDGPU] Move address space enum to LLVM directory (PR #73944)

2023-12-06 Thread Matt Arsenault via cfe-commits


@@ -0,0 +1,31 @@
+//=== AMDGPUAddrSpace.h -*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+/// \file
+/// AMDGPU address space definition
+///
+//
+//===--===//
+
+#ifndef LLVM_SUPPORT_AMDGPUADDRSPACE_H
+#define LLVM_SUPPORT_AMDGPUADDRSPACE_H
+
+namespace llvm {
+namespace AMDGPU {
+enum class AddrSpace {

arsenm wrote:

So this is still a second copy of the address space enum, another copy of which 
exists in AMDGPU.h (which does not use enum class, and uses different names). 
What's the plan to consolidate these?

https://github.com/llvm/llvm-project/pull/73944
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][AMDGPU] Update amdgpu_waves_per_eu attr docs (PR #74587)

2023-12-06 Thread Matt Arsenault via cfe-commits


@@ -2659,8 +2659,9 @@ An error will be given if:
   - Specified values violate subtarget specifications;
   - Specified values are not compatible with values provided through other
 attributes;
-  - The AMDGPU target backend is unable to create machine code that can meet 
the
-request.
+
+The AMDGPU target backend will emit a warning whenever it is unable to

arsenm wrote:

This looks indented differently, without the list - as the previous comment did 

https://github.com/llvm/llvm-project/pull/74587
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][AMDGPU] Update amdgpu_waves_per_eu attr docs (PR #74587)

2023-12-07 Thread Matt Arsenault via cfe-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/74587
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


  1   2   3   4   5   6   7   8   9   10   >