r339109 - AMDGPU: Add builtin for s_dcache_inv_vol
Author: arsenm Date: Tue Aug 7 00:49:04 2018 New Revision: 339109 URL: http://llvm.org/viewvc/llvm-project?rev=339109&view=rev Log: AMDGPU: Add builtin for s_dcache_inv_vol Added: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/lib/Basic/Targets/AMDGPU.cpp cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=339109&r1=339108&r2=339109&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Tue Aug 7 00:49:04 2018 @@ -101,6 +101,11 @@ BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3 BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n") //===--===// +// CI+ only builtins. +//===--===// +TARGET_BUILTIN(__builtin_amdgcn_s_dcache_inv_vol, "v", "n", "ci-insts") + +//===--===// // VI+ only builtins. //===--===// Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=339109&r1=339108&r2=339109&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Tue Aug 7 00:49:04 2018 @@ -148,12 +148,14 @@ bool AMDGPUTargetInfo::initFeatureMap( Features["16-bit-insts"] = true; Features["dpp"] = true; Features["s-memrealtime"] = true; - break; + LLVM_FALLTHROUGH; case GK_GFX704: case GK_GFX703: case GK_GFX702: case GK_GFX701: case GK_GFX700: + Features["ci-insts"] = true; + LLVM_FALLTHROUGH; case GK_GFX601: case GK_GFX600: break; Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl?rev=339109&r1=339108&r2=339109&view=diff == --- cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl Tue Aug 7 00:49:04 2018 @@ -5,8 +5,16 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx904 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX904 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX906 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx801 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX801 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx700 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX700 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s -// GFX904: "target-features"="+16-bit-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime" -// GFX906: "target-features"="+16-bit-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime" +// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime" +// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime" +// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+s-memrealtime" +// GFX700: "target-features"="+ci-insts,+fp64-fp16-denormals,-fp32-denormals" +// GFX600: "target-features"="+fp32-denormals,+fp64-fp16-denormals" +// GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals" kernel void test() {} Added: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl?rev=339109&view=auto == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl (added) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl Tue Aug 7 00:49:04 2018 @@ -0,0 +1,12 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu hawaii -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu fiji -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck %s + +//
r339110 - AMDGPU: Add builtin for s_dcache_wb
Author: arsenm Date: Tue Aug 7 00:49:13 2018 New Revision: 339110 URL: http://llvm.org/viewvc/llvm-project?rev=339110&view=rev Log: AMDGPU: Add builtin for s_dcache_wb Added: cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-vi.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/lib/Basic/Targets/AMDGPU.cpp cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=339110&r1=339109&r2=339110&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Tue Aug 7 00:49:13 2018 @@ -121,6 +121,7 @@ TARGET_BUILTIN(__builtin_amdgcn_fracth, TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts") TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime") TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp") +TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "vi-insts") //===--===// // GFX9+ only builtins. Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=339110&r1=339109&r2=339110&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Tue Aug 7 00:49:13 2018 @@ -145,6 +145,7 @@ bool AMDGPUTargetInfo::initFeatureMap( case GK_GFX803: case GK_GFX802: case GK_GFX801: + Features["vi-insts"] = true; Features["16-bit-insts"] = true; Features["dpp"] = true; Features["s-memrealtime"] = true; Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl?rev=339110&r1=339109&r2=339110&view=diff == --- cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl Tue Aug 7 00:49:13 2018 @@ -10,9 +10,9 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s -// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime" -// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime" -// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+s-memrealtime" +// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts" +// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts" +// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+s-memrealtime,+vi-insts" // GFX700: "target-features"="+ci-insts,+fp64-fp16-denormals,-fp32-denormals" // GFX600: "target-features"="+fp32-denormals,+fp64-fp16-denormals" // GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals" Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl?rev=339110&r1=339109&r2=339110&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl Tue Aug 7 00:49:13 2018 @@ -82,6 +82,13 @@ void test_s_memrealtime(global ulong* ou *out = __builtin_amdgcn_s_memrealtime(); } +// CHECK-LABEL: @test_s_dcache_wb() +// CHECK: call void @llvm.amdgcn.s.dcache.wb() +void test_s_dcache_wb() +{ + __builtin_amdgcn_s_dcache_wb(); +} + // CHECK-LABEL: @test_mov_dpp // CHECK: call i32 @llvm.amdgcn.mov.dpp.i32(i32 %src, i32 0, i32 0, i32 0, i1 false) void test_mov_dpp(global int* out, int src) Added: cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-vi.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-vi.cl?rev=339110&view=auto == --- cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-vi.cl (added) +++ cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-vi.cl Tue Aug 7 00:49:13 2018 @@ -0,0 +1,8 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s
r339278 - AMDGPU: Fix enabling denormals by default on pre-VI targets
Author: arsenm Date: Wed Aug 8 10:48:37 2018 New Revision: 339278 URL: http://llvm.org/viewvc/llvm-project?rev=339278&view=rev Log: AMDGPU: Fix enabling denormals by default on pre-VI targets Fast FMAF is not a sufficient condition to enable denormals. Before VI, enabling denormals caused F32 instructions to run at F64 speeds. Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp cfe/trunk/lib/Basic/Targets/AMDGPU.h cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=339278&r1=339277&r2=339278&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Wed Aug 8 10:48:37 2018 @@ -210,7 +210,8 @@ void AMDGPUTargetInfo::adjustTargetOptio } if (!hasFP32Denormals) TargetOpts.Features.push_back( -(Twine(CGOptsGPU.HasFastFMAF && !CGOpts.FlushDenorm +(Twine(CGOptsGPU.HasFastFMAF && CGOptsGPU.HasFullRateF32Denorms && + !CGOpts.FlushDenorm ? '+' : '-') + Twine("fp32-denormals")) Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.h?rev=339278&r1=339277&r2=339278&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.h (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.h Wed Aug 8 10:48:37 2018 @@ -94,77 +94,78 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTarg bool HasLDEXPF; bool HasFP64; bool HasFastFMA; +bool HasFullRateF32Denorms; }; static constexpr GPUInfo InvalidGPU = -{{""}, {""}, GK_NONE, false, false, false, false, false}; +{{""}, {""}, GK_NONE, false, false, false, false, false, false}; static constexpr GPUInfo R600GPUs[26] = { - // Name CanonicalKindHasHasHasHasHas - // Name FMAF Fast LDEXPF FP64 Fast - // FMAF FMA -{{"r600"},{"r600"},GK_R600,false, false, false, false, false}, -{{"rv630"}, {"r600"},GK_R600,false, false, false, false, false}, -{{"rv635"}, {"r600"},GK_R600,false, false, false, false, false}, -{{"r630"},{"r630"},GK_R630,false, false, false, false, false}, -{{"rs780"}, {"rs880"}, GK_RS880, false, false, false, false, false}, -{{"rs880"}, {"rs880"}, GK_RS880, false, false, false, false, false}, -{{"rv610"}, {"rs880"}, GK_RS880, false, false, false, false, false}, -{{"rv620"}, {"rs880"}, GK_RS880, false, false, false, false, false}, -{{"rv670"}, {"rv670"}, GK_RV670, false, false, false, false, false}, -{{"rv710"}, {"rv710"}, GK_RV710, false, false, false, false, false}, -{{"rv730"}, {"rv730"}, GK_RV730, false, false, false, false, false}, -{{"rv740"}, {"rv770"}, GK_RV770, false, false, false, false, false}, -{{"rv770"}, {"rv770"}, GK_RV770, false, false, false, false, false}, -{{"cedar"}, {"cedar"}, GK_CEDAR, false, false, false, false, false}, -{{"palm"},{"cedar"}, GK_CEDAR, false, false, false, false, false}, -{{"cypress"}, {"cypress"}, GK_CYPRESS, true, false, false, false, false}, -{{"hemlock"}, {"cypress"}, GK_CYPRESS, true, false, false, false, false}, -{{"juniper"}, {"juniper"}, GK_JUNIPER, false, false, false, false, false}, -{{"redwood"}, {"redwood"}, GK_REDWOOD, false, false, false, false, false}, -{{"sumo"},{"sumo"},GK_SUMO,false, false, false, false, false}, -{{"sumo2"}, {"sumo"},GK_SUMO,false, false, false, false, false}, -{{"barts"}, {"barts"}, GK_BARTS, false, false, false, false, false}, -{{"caicos"}, {"caicos"}, GK_BARTS, false, false, false, false, false}, -{{"aruba"}, {"cayman"}, GK_CAYMAN, true, false, false, false, false}, -{{"cayman"}, {"cayman"}, GK_CAYMAN, true, false, false, false, false}, -{{"turks"}, {"turks"}, GK_TURKS, false, false, false, false, false}, + // Name CanonicalKindHasHasHasHasHas Has + // Name FMAF Fast LDEXPF FP64 Fast Fast + // FMAF FMA Denorm +{{"r600"},{"r600"},GK_R600,false, false, false, false, false, false}, +{{"rv630"}, {"r600"},GK_R600,false, false, false, false, false, false}, +{{"rv635"}, {"r600"},GK_R600,false, false, false, false, false, false}, +{{"r630"},{"r630"},GK_R630,false, false, false, false, false, false}, +{{"rs780"}, {"rs880"}, GK_RS8
r339395 - AMDGPU: Add another missing builtin
Author: arsenm Date: Thu Aug 9 15:18:37 2018 New Revision: 339395 URL: http://llvm.org/viewvc/llvm-project?rev=339395&view=rev Log: AMDGPU: Add another missing builtin Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=339395&r1=339394&r2=339395&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Thu Aug 9 15:18:37 2018 @@ -104,6 +104,7 @@ BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3 // CI+ only builtins. //===--===// TARGET_BUILTIN(__builtin_amdgcn_s_dcache_inv_vol, "v", "n", "ci-insts") +TARGET_BUILTIN(__builtin_amdgcn_buffer_wbinvl1_vol, "v", "n", "ci-insts") //===--===// // VI+ only builtins. Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl?rev=339395&r1=339394&r2=339395&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl Thu Aug 9 15:18:37 2018 @@ -10,3 +10,10 @@ void test_s_dcache_inv_vol() __builtin_amdgcn_s_dcache_inv_vol(); } +// CHECK-LABEL: @test_buffer_wbinvl1_vol +// CHECK: call void @llvm.amdgcn.buffer.wbinvl1.vol() +void test_buffer_wbinvl1_vol() +{ + __builtin_amdgcn_buffer_wbinvl1_vol(); +} + Modified: cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl?rev=339395&r1=339394&r2=339395&view=diff == --- cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl (original) +++ cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl Thu Aug 9 15:18:37 2018 @@ -1,7 +1,8 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s -void test_ci_s_dcache_inv_vol() +void test_ci_biltins() { __builtin_amdgcn_s_dcache_inv_vol(); // expected-error {{'__builtin_amdgcn_s_dcache_inv_vol' needs target feature ci-insts}} + __builtin_amdgcn_buffer_wbinvl1_vol(); // expected-error {{'__builtin_amdgcn_buffer_wbinvl1_vol' needs target feature ci-insts}} } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r339934 - AMDGPU: Correct errors in device table
Author: arsenm Date: Thu Aug 16 13:19:47 2018 New Revision: 339934 URL: http://llvm.org/viewvc/llvm-project?rev=339934&view=rev Log: AMDGPU: Correct errors in device table Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.h?rev=339934&r1=339933&r2=339934&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.h (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.h Thu Aug 16 13:19:47 2018 @@ -125,7 +125,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTarg {{"sumo"},{"sumo"},GK_SUMO,false, false, false, false, false, false}, {{"sumo2"}, {"sumo"},GK_SUMO,false, false, false, false, false, false}, {{"barts"}, {"barts"}, GK_BARTS, false, false, false, false, false, false}, -{{"caicos"}, {"caicos"}, GK_BARTS, false, false, false, false, false, false}, +{{"caicos"}, {"caicos"}, GK_CAICOS, false, false, false, false, false, false}, {{"aruba"}, {"cayman"}, GK_CAYMAN, true, false, false, false, false, false}, {{"cayman"}, {"cayman"}, GK_CAYMAN, true, false, false, false, false, false}, {{"turks"}, {"turks"}, GK_TURKS, false, false, false, false, false, false}, @@ -163,7 +163,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTarg {{"gfx810"},{"gfx810"}, GK_GFX810, true, false, true, true, true, true}, {{"stoney"},{"gfx810"}, GK_GFX810, true, false, true, true, true, true}, {{"gfx900"},{"gfx900"}, GK_GFX900, true, true, true, true, true, true}, -{{"gfx902"},{"gfx902"}, GK_GFX900, true, true, true, true, true, true}, +{{"gfx902"},{"gfx902"}, GK_GFX902, true, true, true, true, true, true}, {{"gfx904"},{"gfx904"}, GK_GFX904, true, true, true, true, true, true}, {{"gfx906"},{"gfx906"}, GK_GFX906, true, true, true, true, true, true}, }; ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r340193 - Rename -mlink-cuda-bitcode to -mlink-builtin-bitcode
Author: arsenm Date: Mon Aug 20 11:16:48 2018 New Revision: 340193 URL: http://llvm.org/viewvc/llvm-project?rev=340193&view=rev Log: Rename -mlink-cuda-bitcode to -mlink-builtin-bitcode The same semantics work for OpenCL, and probably any offload language. Keep the old name around as an alias. Modified: cfe/trunk/include/clang/Driver/CC1Options.td cfe/trunk/lib/Driver/ToolChains/Cuda.cpp cfe/trunk/lib/Frontend/CompilerInvocation.cpp cfe/trunk/test/CodeGenCUDA/link-device-bitcode.cu cfe/trunk/test/CodeGenCUDA/propagate-metadata.cu cfe/trunk/test/Driver/cuda-detect.cu cfe/trunk/test/Driver/openmp-offload-gpu.c Modified: cfe/trunk/include/clang/Driver/CC1Options.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/CC1Options.td?rev=340193&r1=340192&r2=340193&view=diff == --- cfe/trunk/include/clang/Driver/CC1Options.td (original) +++ cfe/trunk/include/clang/Driver/CC1Options.td Mon Aug 20 11:16:48 2018 @@ -287,9 +287,11 @@ def mconstructor_aliases : Flag<["-"], " HelpText<"Emit complete constructors and destructors as aliases when possible">; def mlink_bitcode_file : Separate<["-"], "mlink-bitcode-file">, HelpText<"Link the given bitcode file before performing optimizations.">; -def mlink_cuda_bitcode : Separate<["-"], "mlink-cuda-bitcode">, +def mlink_builtin_bitcode : Separate<["-"], "mlink-builtin-bitcode">, HelpText<"Link and internalize needed symbols from the given bitcode file " "before performing optimizations.">; +def mlink_cuda_bitcode : Separate<["-"], "mlink-cuda-bitcode">, + Alias; def vectorize_loops : Flag<["-"], "vectorize-loops">, HelpText<"Run the Loop vectorization passes">; def vectorize_slp : Flag<["-"], "vectorize-slp">, Modified: cfe/trunk/lib/Driver/ToolChains/Cuda.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Cuda.cpp?rev=340193&r1=340192&r2=340193&view=diff == --- cfe/trunk/lib/Driver/ToolChains/Cuda.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/Cuda.cpp Mon Aug 20 11:16:48 2018 @@ -621,7 +621,7 @@ void CudaToolChain::addClangTargetOption return; } - CC1Args.push_back("-mlink-cuda-bitcode"); + CC1Args.push_back("-mlink-builtin-bitcode"); CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); // Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM @@ -667,7 +667,7 @@ void CudaToolChain::addClangTargetOption SmallString<128> LibOmpTargetFile(LibraryPath); llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName); if (llvm::sys::fs::exists(LibOmpTargetFile)) { -CC1Args.push_back("-mlink-cuda-bitcode"); +CC1Args.push_back("-mlink-builtin-bitcode"); CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile)); FoundBCLibrary = true; break; Modified: cfe/trunk/lib/Frontend/CompilerInvocation.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CompilerInvocation.cpp?rev=340193&r1=340192&r2=340193&view=diff == --- cfe/trunk/lib/Frontend/CompilerInvocation.cpp (original) +++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp Mon Aug 20 11:16:48 2018 @@ -912,10 +912,10 @@ static bool ParseCodeGenArgs(CodeGenOpti Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations); Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir); for (auto *A : - Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_cuda_bitcode)) { + Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_builtin_bitcode)) { CodeGenOptions::BitcodeFileToLink F; F.Filename = A->getValue(); -if (A->getOption().matches(OPT_mlink_cuda_bitcode)) { +if (A->getOption().matches(OPT_mlink_builtin_bitcode)) { F.LinkFlags = llvm::Linker::Flags::LinkOnlyNeeded; // When linking CUDA bitcode, propagate function attributes so that // e.g. libdevice gets fast-math attrs if we're building with fast-math. Modified: cfe/trunk/test/CodeGenCUDA/link-device-bitcode.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/link-device-bitcode.cu?rev=340193&r1=340192&r2=340193&view=diff == --- cfe/trunk/test/CodeGenCUDA/link-device-bitcode.cu (original) +++ cfe/trunk/test/CodeGenCUDA/link-device-bitcode.cu Mon Aug 20 11:16:48 2018 @@ -11,13 +11,19 @@ // // Make sure function in device-code gets linked in and internalized. // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ +// RUN:-mlink-builtin-bitcode %t.bc -emit-llvm \ +// RUN:-disable-llvm-passes -o - %s \ +// RUN:| FileCheck %s -check-prefix CHECK-IR + +// Make sure legacy flag name works +// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-d
r340292 - AMDGPU: Move target code into TargetParser
Author: arsenm Date: Tue Aug 21 09:13:29 2018 New Revision: 340292 URL: http://llvm.org/viewvc/llvm-project?rev=340292&view=rev Log: AMDGPU: Move target code into TargetParser Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp cfe/trunk/lib/Basic/Targets/AMDGPU.h Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=340292&r1=340291&r2=340292&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Tue Aug 21 09:13:29 2018 @@ -127,12 +127,14 @@ bool AMDGPUTargetInfo::initFeatureMap( llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeatureVec) const { + using namespace llvm::AMDGPU; + // XXX - What does the member GPU mean if device name string passed here? if (isAMDGCN(getTriple())) { if (CPU.empty()) CPU = "gfx600"; -switch (parseAMDGCNName(CPU).Kind) { +switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { case GK_GFX906: Features["dl-insts"] = true; LLVM_FALLTHROUGH; @@ -169,7 +171,7 @@ bool AMDGPUTargetInfo::initFeatureMap( if (CPU.empty()) CPU = "r600"; -switch (parseR600Name(CPU).Kind) { +switch (llvm::AMDGPU::parseArchR600(CPU)) { case GK_CAYMAN: case GK_CYPRESS: case GK_RV770: @@ -201,7 +203,7 @@ void AMDGPUTargetInfo::adjustTargetOptio TargetOptions &TargetOpts) const { bool hasFP32Denormals = false; bool hasFP64Denormals = false; - GPUInfo CGOptsGPU = parseGPUName(TargetOpts.CPU); + for (auto &I : TargetOpts.FeaturesAsWritten) { if (I == "+fp32-denormals" || I == "-fp32-denormals") hasFP32Denormals = true; @@ -210,54 +212,20 @@ void AMDGPUTargetInfo::adjustTargetOptio } if (!hasFP32Denormals) TargetOpts.Features.push_back( -(Twine(CGOptsGPU.HasFastFMAF && CGOptsGPU.HasFullRateF32Denorms && - !CGOpts.FlushDenorm - ? '+' - : '-') + - Twine("fp32-denormals")) + (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm + ? '+' : '-') + Twine("fp32-denormals")) .str()); // Always do not flush fp64 or fp16 denorms. - if (!hasFP64Denormals && CGOptsGPU.HasFP64) + if (!hasFP64Denormals && hasFP64()) TargetOpts.Features.push_back("+fp64-fp16-denormals"); } -constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::InvalidGPU; -constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::R600GPUs[]; -constexpr AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::AMDGCNGPUs[]; - -AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseR600Name(StringRef Name) { - const auto *Result = llvm::find_if( - R600GPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; }); - - if (Result == std::end(R600GPUs)) -return InvalidGPU; - return *Result; -} - -AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseAMDGCNName(StringRef Name) { - const auto *Result = llvm::find_if( - AMDGCNGPUs, [Name](const GPUInfo &GPU) { return GPU.Name == Name; }); - - if (Result == std::end(AMDGCNGPUs)) -return InvalidGPU; - return *Result; -} - -AMDGPUTargetInfo::GPUInfo AMDGPUTargetInfo::parseGPUName(StringRef Name) const { - if (isAMDGCN(getTriple())) -return parseAMDGCNName(Name); - else -return parseR600Name(Name); -} - void AMDGPUTargetInfo::fillValidCPUList( SmallVectorImpl &Values) const { if (isAMDGCN(getTriple())) -llvm::for_each(AMDGCNGPUs, [&Values](const GPUInfo &GPU) { - Values.emplace_back(GPU.Name);}); +llvm::AMDGPU::fillValidArchListAMDGCN(Values); else -llvm::for_each(R600GPUs, [&Values](const GPUInfo &GPU) { - Values.emplace_back(GPU.Name);}); +llvm::AMDGPU::fillValidArchListR600(Values); } void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { @@ -267,7 +235,12 @@ void AMDGPUTargetInfo::setAddressSpaceMa AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : TargetInfo(Triple), - GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) { + GPUKind(isAMDGCN(Triple) ? + llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : + llvm::AMDGPU::parseArchR600(Opts.CPU)), + GPUFeatures(isAMDGCN(Triple) ? + llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : + llvm::AMDGPU::getArchAttrR600(GPUKind)) { resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN : DataLayoutStringR600); assert(DataLayout->getAllocaAddrSpace() == Private); @@ -312,19 +285,22 @@ void AMDGPUTargetInfo::getTargetDefines( else Builder.defineMacro("__R600__"); - if (GPU.Kind != GK_NONE) -Builder.defineMacro(Twine("__") + Twine(GPU.C
r315094 - OpenCL: Assume functions are convergent
Author: arsenm Date: Fri Oct 6 12:34:40 2017 New Revision: 315094 URL: http://llvm.org/viewvc/llvm-project?rev=315094&view=rev Log: OpenCL: Assume functions are convergent This was done for CUDA functions in r261779, and for the same reason this also needs to be done for OpenCL. An arbitrary function could have a barrier() call in it, which in turn requires the calling function to be convergent. Modified: cfe/trunk/include/clang/Basic/LangOptions.h cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl cfe/trunk/test/CodeGenOpenCL/convergent.cl Modified: cfe/trunk/include/clang/Basic/LangOptions.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/LangOptions.h?rev=315094&r1=315093&r2=315094&view=diff == --- cfe/trunk/include/clang/Basic/LangOptions.h (original) +++ cfe/trunk/include/clang/Basic/LangOptions.h Fri Oct 6 12:34:40 2017 @@ -197,6 +197,10 @@ public: bool allowsNonTrivialObjCLifetimeQualifiers() const { return ObjCAutoRefCount || ObjCWeak; } + + bool assumeFunctionsAreConvergent() const { +return (CUDA && CUDAIsDevice) || OpenCL; + } }; /// \brief Floating point control options Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=315094&r1=315093&r2=315094&view=diff == --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Fri Oct 6 12:34:40 2017 @@ -1750,13 +1750,16 @@ void CodeGenModule::ConstructDefaultFnAt FuncAttrs.addAttribute("backchain"); } - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { -// Conservatively, mark all functions and calls in CUDA as convergent -// (meaning, they may call an intrinsically convergent op, such as -// __syncthreads(), and so can't have certain optimizations applied around -// them). LLVM will remove this attribute where it safely can. + if (getLangOpts().assumeFunctionsAreConvergent()) { +// Conservatively, mark all functions and calls in CUDA and OpenCL as +// convergent (meaning, they may call an intrinsically convergent op, such +// as __syncthreads() / barrier(), and so can't have certain optimizations +// applied around them). LLVM will remove this attribute where it safely +// can. FuncAttrs.addAttribute(llvm::Attribute::Convergent); + } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { // Exceptions aren't supported in CUDA device code. FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl?rev=315094&r1=315093&r2=315094&view=diff == --- cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl Fri Oct 6 12:34:40 2017 @@ -151,28 +151,28 @@ kernel void reqd_work_group_size_32_2_1_ // CHECK-NOT: "amdgpu-num-sgpr"="0" // CHECK-NOT: "amdgpu-num-vgpr"="0" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" -// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" +// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" +// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-fla
r315219 - AMDGPU: Fix missing declaration for __builtin_amdgcn_dispatch_ptr
Author: arsenm Date: Mon Oct 9 10:44:18 2017 New Revision: 315219 URL: http://llvm.org/viewvc/llvm-project?rev=315219&view=rev Log: AMDGPU: Fix missing declaration for __builtin_amdgcn_dispatch_ptr Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=315219&r1=315218&r2=315219&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Mon Oct 9 10:44:18 2017 @@ -21,6 +21,7 @@ // SI+ only builtins. //===--===// +BUILTIN(__builtin_amdgcn_dispatch_ptr, "Uc*2", "nc") BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "Uc*2", "nc") BUILTIN(__builtin_amdgcn_implicitarg_ptr, "Uc*2", "nc") Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=315219&r1=315218&r2=315219&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Mon Oct 9 10:44:18 2017 @@ -421,6 +421,13 @@ void test_read_exec(global ulong* out) { // CHECK: declare i64 @llvm.read_register.i64(metadata) #[[NOUNWIND_READONLY:[0-9]+]] +// CHECK-LABEL: @test_dispatch_ptr +// CHECK: call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() +void test_dispatch_ptr(__attribute__((address_space(2))) unsigned char ** out) +{ + *out = __builtin_amdgcn_dispatch_ptr(); +} + // CHECK-LABEL: @test_kernarg_segment_ptr // CHECK: call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() void test_kernarg_segment_ptr(__attribute__((address_space(2))) unsigned char ** out) ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r315238 - AMDGPU: Add read_exec_lo/hi builtins
Author: arsenm Date: Mon Oct 9 13:06:37 2017 New Revision: 315238 URL: http://llvm.org/viewvc/llvm-project?rev=315238&view=rev Log: AMDGPU: Add read_exec_lo/hi builtins Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=315238&r1=315237&r2=315238&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Mon Oct 9 13:06:37 2017 @@ -121,6 +121,8 @@ TARGET_BUILTIN(__builtin_amdgcn_fmed3h, // Special builtins. //===--===// BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc") +BUILTIN(__builtin_amdgcn_read_exec_lo, "Ui", "nc") +BUILTIN(__builtin_amdgcn_read_exec_hi, "Ui", "nc") //===--===// // R600-NI only builtins. Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=315238&r1=315237&r2=315238&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Oct 9 13:06:37 2017 @@ -9103,6 +9103,15 @@ Value *CodeGenFunction::EmitAMDGPUBuilti CI->setConvergent(); return CI; } + case AMDGPU::BI__builtin_amdgcn_read_exec_lo: + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { +StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ? + "exec_lo" : "exec_hi"; +CallInst *CI = cast( + EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName)); +CI->setConvergent(); +return CI; + } // amdgcn workitem case AMDGPU::BI__builtin_amdgcn_workitem_id_x: Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=315238&r1=315237&r2=315238&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Mon Oct 9 13:06:37 2017 @@ -421,6 +421,18 @@ void test_read_exec(global ulong* out) { // CHECK: declare i64 @llvm.read_register.i64(metadata) #[[NOUNWIND_READONLY:[0-9]+]] +// CHECK-LABEL: @test_read_exec_lo( +// CHECK: call i32 @llvm.read_register.i32(metadata ![[EXEC_LO:[0-9]+]]) #[[READ_EXEC_ATTRS]] +void test_read_exec_lo(global uint* out) { + *out = __builtin_amdgcn_read_exec_lo(); +} + +// CHECK-LABEL: @test_read_exec_hi( +// CHECK: call i32 @llvm.read_register.i32(metadata ![[EXEC_HI:[0-9]+]]) #[[READ_EXEC_ATTRS]] +void test_read_exec_hi(global uint* out) { + *out = __builtin_amdgcn_read_exec_hi(); +} + // CHECK-LABEL: @test_dispatch_ptr // CHECK: call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() void test_dispatch_ptr(__attribute__((address_space(2))) unsigned char ** out) @@ -499,3 +511,5 @@ void test_s_getpc(global ulong* out) // CHECK-DAG: attributes #[[NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[READ_EXEC_ATTRS]] = { convergent } // CHECK-DAG: ![[EXEC]] = !{!"exec"} +// CHECK-DAG: ![[EXEC_LO]] = !{!"exec_lo"} +// CHECK-DAG: ![[EXEC_HI]] = !{!"exec_hi"} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r341033 - AMDGPU: Default to hidden visibility
Author: arsenm Date: Thu Aug 30 01:18:06 2018 New Revision: 341033 URL: http://llvm.org/viewvc/llvm-project?rev=341033&view=rev Log: AMDGPU: Default to hidden visibility Object linking isn't supported, so it's not useful to emit default visibility. Default visibility requires relocations we don't yet support for functions compiled in another translation unit. WebAssembly already does this, although they insert these arguments in a different place for some reason. Added: cfe/trunk/test/Driver/amdgpu-visibility.cl Modified: cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp cfe/trunk/lib/Driver/ToolChains/AMDGPU.h Modified: cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp?rev=341033&r1=341032&r2=341033&view=diff == --- cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp Thu Aug 30 01:18:06 2018 @@ -98,3 +98,16 @@ AMDGPUToolChain::TranslateArgs(const Der return DAL; } + +void AMDGPUToolChain::addClangTargetOptions( +const llvm::opt::ArgList &DriverArgs, +llvm::opt::ArgStringList &CC1Args, +Action::OffloadKind DeviceOffloadingKind) const { + // Default to "hidden" visibility, as object level linking will not be + // supported for the forseeable future. + if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, + options::OPT_fvisibility_ms_compat)) { +CC1Args.push_back("-fvisibility"); +CC1Args.push_back("hidden"); + } +} Modified: cfe/trunk/lib/Driver/ToolChains/AMDGPU.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/AMDGPU.h?rev=341033&r1=341032&r2=341033&view=diff == --- cfe/trunk/lib/Driver/ToolChains/AMDGPU.h (original) +++ cfe/trunk/lib/Driver/ToolChains/AMDGPU.h Thu Aug 30 01:18:06 2018 @@ -61,6 +61,10 @@ public: llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override; + + void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadKind) const override; }; } // end namespace toolchains Added: cfe/trunk/test/Driver/amdgpu-visibility.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/amdgpu-visibility.cl?rev=341033&view=auto == --- cfe/trunk/test/Driver/amdgpu-visibility.cl (added) +++ cfe/trunk/test/Driver/amdgpu-visibility.cl Thu Aug 30 01:18:06 2018 @@ -0,0 +1,7 @@ +// RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm %s 2>&1 | FileCheck -check-prefix=DEFAULT %s +// RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm -fvisibility=protected %s 2>&1 | FileCheck -check-prefix=OVERRIDE-PROTECTED %s +// RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm -fvisibility-ms-compat %s 2>&1 | FileCheck -check-prefix=OVERRIDE-MS %s + +// DEFAULT: "-fvisibility" "hidden" +// OVERRIDE-PROTECTED: "-fvisibility" "protected" +// OVERRIDE-MS: "-fvisibility" "hidden" "-ftype-visibility" "default" ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r338707 - Try to make builtin address space declarations not useless
Author: arsenm Date: Thu Aug 2 05:14:28 2018 New Revision: 338707 URL: http://llvm.org/viewvc/llvm-project?rev=338707&view=rev Log: Try to make builtin address space declarations not useless The way address space declarations for builtins currently work is nearly useless. The code assumes the address spaces used for builtins is a confusingly named "target address space" from user code using __attribute__((address_space(N))) that matches the builtin declaration. There's no way to use this to declare a builtin that returns a language specific address space. The terminology used is highly cofusing since it has nothing to do with the the address space selected by the target to use for a language address space. This feature is essentially unused as-is. AMDGPU and NVPTX are the only in-tree targets attempting to use this. The AMDGPU builtins certainly do not behave as intended (i.e. all of the builtins returning pointers can never compile because the numbered address space never matches the expected named address space). The NVPTX builtins are missing tests for some, and the others seem to rely on an implicit addrspacecast. Change the used address space for builtins based on a target hook to allow using a language address space for a builtin. This allows the same builtin declaration to be used for multiple languages with similarly purposed address spaces (e.g. the same AMDGPU builtin can be used in OpenCL and CUDA even though the constant address spaces are arbitarily different). This breaks the possibility of using arbitrary numbered address spaces alongside the named address spaces for builtins. If this is an issue we probably need to introduce another builtin declaration character to distinguish language address spaces from so-called "target address spaces". Added: cfe/trunk/test/CodeGenCUDA/builtins-amdgcn.cu cfe/trunk/test/CodeGenOpenCL/numbered-address-space.cl cfe/trunk/test/SemaOpenCL/numbered-address-space.cl Modified: cfe/trunk/include/clang/AST/ASTContext.h cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/include/clang/Basic/TargetInfo.h cfe/trunk/lib/AST/ASTContext.cpp cfe/trunk/lib/Basic/Targets/AMDGPU.h cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Sema/SemaExpr.cpp cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Modified: cfe/trunk/include/clang/AST/ASTContext.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/ASTContext.h?rev=338707&r1=338706&r2=338707&view=diff == --- cfe/trunk/include/clang/AST/ASTContext.h (original) +++ cfe/trunk/include/clang/AST/ASTContext.h Thu Aug 2 05:14:28 2018 @@ -2488,6 +2488,8 @@ public: unsigned getTargetAddressSpace(LangAS AS) const; + LangAS getLangASForBuiltinAddressSpace(unsigned AS) const; + /// Get target-dependent integer value for null pointer which is used for /// constant folding. uint64_t getTargetNullPointerValue(QualType QT) const; Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=338707&r1=338706&r2=338707&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Thu Aug 2 05:14:28 2018 @@ -21,9 +21,9 @@ // SI+ only builtins. //===--===// -BUILTIN(__builtin_amdgcn_dispatch_ptr, "Uc*4", "nc") -BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "Uc*4", "nc") -BUILTIN(__builtin_amdgcn_implicitarg_ptr, "Uc*4", "nc") +BUILTIN(__builtin_amdgcn_dispatch_ptr, "v*4", "nc") +BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "v*4", "nc") +BUILTIN(__builtin_amdgcn_implicitarg_ptr, "v*4", "nc") BUILTIN(__builtin_amdgcn_workgroup_id_x, "Ui", "nc") BUILTIN(__builtin_amdgcn_workgroup_id_y, "Ui", "nc") @@ -45,6 +45,8 @@ BUILTIN(__builtin_amdgcn_s_barrier, "v", BUILTIN(__builtin_amdgcn_wave_barrier, "v", "n") BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n") BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n") + +// FIXME: Need to disallow constant address space. BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n") BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n") BUILTIN(__builtin_amdgcn_div_fmas, "b", "nc") @@ -93,9 +95,9 @@ BUILTIN(__builtin_amdgcn_ds_bpermute, "i BUILTIN(__builtin_amdgcn_readfirstlane, "ii", "nc") BUILTIN(__builtin_amdgcn_readlane, "iii", "nc") BUILTIN(__builtin_amdgcn_fmed3f, "", "nc") -BUILTIN(__builtin_amdgcn_ds_faddf, "ff*fIiIiIb", "n") -BUILTIN(__builtin_amdgcn_ds_fminf, "ff*fIiIiIb", "n") -BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*fIiIiIb", "n") +BUILTIN(__builtin_amdgcn_ds_faddf, "ff*3fIiIiIb", "n") +BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3fIiIiIb", "n") +BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n") //===---
r338754 - AMDGPU: Fix missing declaration of queue ptr builtin
Author: arsenm Date: Thu Aug 2 11:24:55 2018 New Revision: 338754 URL: http://llvm.org/viewvc/llvm-project?rev=338754&view=rev Log: AMDGPU: Fix missing declaration of queue ptr builtin Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=338754&r1=338753&r2=338754&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Thu Aug 2 11:24:55 2018 @@ -24,6 +24,7 @@ BUILTIN(__builtin_amdgcn_dispatch_ptr, "v*4", "nc") BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "v*4", "nc") BUILTIN(__builtin_amdgcn_implicitarg_ptr, "v*4", "nc") +BUILTIN(__builtin_amdgcn_queue_ptr, "v*4", "nc") BUILTIN(__builtin_amdgcn_workgroup_id_x, "Ui", "nc") BUILTIN(__builtin_amdgcn_workgroup_id_y, "Ui", "nc") Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=338754&r1=338753&r2=338754&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Thu Aug 2 11:24:55 2018 @@ -462,6 +462,13 @@ void test_dispatch_ptr(__constant unsign *out = __builtin_amdgcn_dispatch_ptr(); } +// CHECK-LABEL: @test_queue_ptr +// CHECK: call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() +void test_queue_ptr(__constant unsigned char ** out) +{ + *out = __builtin_amdgcn_queue_ptr(); +} + // CHECK-LABEL: @test_kernarg_segment_ptr // CHECK: call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() void test_kernarg_segment_ptr(__constant unsigned char ** out) ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r324641 - Fix crash on array initializer with non-0 alloca addrspace
Author: arsenm Date: Thu Feb 8 11:37:09 2018 New Revision: 324641 URL: http://llvm.org/viewvc/llvm-project?rev=324641&view=rev Log: Fix crash on array initializer with non-0 alloca addrspace Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl cfe/trunk/test/CodeGenOpenCL/private-array-initialization.cl Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDecl.cpp?rev=324641&r1=324640&r2=324641&view=diff == --- cfe/trunk/lib/CodeGen/CGDecl.cpp (original) +++ cfe/trunk/lib/CodeGen/CGDecl.cpp Thu Feb 8 11:37:09 2018 @@ -1337,7 +1337,8 @@ void CodeGenFunction::EmitAutoVarInit(co isVolatile); // Zero and undef don't require a stores. if (!constant->isNullValue() && !isa(constant)) { - Loc = Builder.CreateBitCast(Loc, constant->getType()->getPointerTo()); + Loc = Builder.CreateBitCast(Loc, +constant->getType()->getPointerTo(Loc.getAddressSpace())); emitStoresForInitAfterMemset(constant, Loc.getPointer(), isVolatile, Builder); } Modified: cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl?rev=324641&r1=324640&r2=324641&view=diff == --- cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl Thu Feb 8 11:37:09 2018 @@ -20,3 +20,13 @@ __constant ConstantArrayPointerStruct co &constant_array_struct.f }; +__kernel void initializer_cast_is_valid_crash() +{ + unsigned char v512[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00 + }; + +} Modified: cfe/trunk/test/CodeGenOpenCL/private-array-initialization.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/private-array-initialization.cl?rev=324641&r1=324640&r2=324641&view=diff == --- cfe/trunk/test/CodeGenOpenCL/private-array-initialization.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/private-array-initialization.cl Thu Feb 8 11:37:09 2018 @@ -1,9 +1,33 @@ -// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -emit-llvm -o - | FileCheck -check-prefix=PRIVATE0 %s +// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa-unknown -O0 -emit-llvm -o - | FileCheck -check-prefix=PRIVATE5 %s // CHECK: @test.arr = private unnamed_addr addrspace(2) constant [3 x i32] [i32 1, i32 2, i32 3], align 4 void test() { __private int arr[] = {1, 2, 3}; -// CHECK: %[[arr_i8_ptr:[0-9]+]] = bitcast [3 x i32]* %arr to i8* -// CHECK: call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[arr_i8_ptr]], i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* @test.arr to i8 addrspace(2)*), i32 12, i1 false) +// PRIVATE0: %[[arr_i8_ptr:[0-9]+]] = bitcast [3 x i32]* %arr to i8* +// PRIVATE0: call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[arr_i8_ptr]], i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* @test.arr to i8 addrspace(2)*), i32 12, i1 false) + +// PRIVATE5: %arr = alloca [3 x i32], align 4, addrspace(5) +// PRIVATE5: %0 = bitcast [3 x i32] addrspace(5)* %arr to i8 addrspace(5)* +// PRIVATE5: call void @llvm.memcpy.p5i8.p2i8.i64(i8 addrspace(5)* align 4 %0, i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* @test.arr to i8 addrspace(2)*), i64 12, i1 false) +} + +__kernel void initializer_cast_is_valid_crash() { +// PRIVATE0: %v512 = alloca [64 x i8], align 1 +// PRIVATE0: %0 = bitcast [64 x i8]* %v512 to i8* +// PRIVATE0: call void @llvm.memset.p0i8.i32(i8* align 1 %0, i8 0, i32 64, i1 false) +// PRIVATE0: %1 = bitcast i8* %0 to [64 x i8]* + + +// PRIVATE5: %v512 = alloca [64 x i8], align 1, addrspace(5) +// PRIVATE5: %0 = bitcast [64 x i8] addrspace(5)* %v512 to i8 addrspace(5)* +// PRIVATE5: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 1 %0, i8 0, i64 64, i1 false) +// PRIVATE5: %1 = bitcast i8 addrspace(5)* %0 to [64 x i8] addrspace(5)* + unsigned char v512[64] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x
r324748 - AMDGPU: Update for datalayout change
Author: arsenm Date: Fri Feb 9 08:58:41 2018 New Revision: 324748 URL: http://llvm.org/viewvc/llvm-project?rev=324748&view=rev Log: AMDGPU: Update for datalayout change Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp cfe/trunk/test/CodeGen/target-data.c cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=324748&r1=324747&r2=324748&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Fri Feb 9 08:58:41 2018 @@ -33,12 +33,12 @@ static const char *const DataLayoutStrin "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; static const char *const DataLayoutStringSIPrivateIsZero = -"e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" +"e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p6:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; static const char *const DataLayoutStringSIGenericIsZero = -"e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32" +"e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; @@ -144,7 +144,7 @@ const char *const AMDGPUTargetInfo::GCCR "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", - "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", + "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", "flat_scratch_lo", "flat_scratch_hi" }; Modified: cfe/trunk/test/CodeGen/target-data.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/target-data.c?rev=324748&r1=324747&r2=324748&view=diff == --- cfe/trunk/test/CodeGen/target-data.c (original) +++ cfe/trunk/test/CodeGen/target-data.c Fri Feb 9 08:58:41 2018 @@ -132,12 +132,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" // Test default -target-cpu // RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SIDefault -// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" +// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" // RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=AARCH64 Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl?rev=324748&r1=324747&r2=324748&view=diff == --- cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl Fri Feb 9 08:58:41 2018 @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s -// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" +// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" void foo(void) {} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r352443 - AMDGPU: Add ds append/consume builtins
Author: arsenm Date: Mon Jan 28 15:59:18 2019 New Revision: 352443 URL: http://llvm.org/viewvc/llvm-project?rev=352443&view=rev Log: AMDGPU: Add ds append/consume builtins Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=352443&r1=352442&r2=352443&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Mon Jan 28 15:59:18 2019 @@ -98,6 +98,8 @@ BUILTIN(__builtin_amdgcn_fmed3f, "", BUILTIN(__builtin_amdgcn_ds_faddf, "ff*3fIiIiIb", "n") BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3fIiIiIb", "n") BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n") +BUILTIN(__builtin_amdgcn_ds_append, "ii*3", "n") +BUILTIN(__builtin_amdgcn_ds_consume, "ii*3", "n") //===--===// // CI+ only builtins. Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=352443&r1=352442&r2=352443&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jan 28 15:59:18 2019 @@ -12478,6 +12478,14 @@ Value *CodeGenFunction::EmitAMDGPUBuilti case AMDGPU::BI__builtin_amdgcn_fmed3f: case AMDGPU::BI__builtin_amdgcn_fmed3h: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); + case AMDGPU::BI__builtin_amdgcn_ds_append: + case AMDGPU::BI__builtin_amdgcn_ds_consume: { +Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? + Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume; +Value *Src0 = EmitScalarExpr(E->getArg(0)); +Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); +return Builder.CreateCall(F, { Src0, Builder.getFalse() }); + } case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast( EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=352443&r1=352442&r2=352443&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Mon Jan 28 15:59:18 2019 @@ -536,6 +536,18 @@ void test_s_getpc(global ulong* out) *out = __builtin_amdgcn_s_getpc(); } +// CHECK-LABEL: @test_ds_append_lds( +// CHECK: call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %ptr, i1 false) +kernel void test_ds_append_lds(global int* out, local int* ptr) { + *out = __builtin_amdgcn_ds_append(ptr); +} + +// CHECK-LABEL: @test_ds_consume_lds( +// CHECK: call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %ptr, i1 false) +kernel void test_ds_consume_lds(global int* out, local int* ptr) { + *out = __builtin_amdgcn_ds_consume(ptr); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: r350970 - [Darwin][Driver] Don't pass a file as object_path_lto during ThinLTO
> On Jan 11, 2019, at 4:16 PM, Steven Wu via cfe-commits > wrote: > > Author: steven_wu > Date: Fri Jan 11 13:16:04 2019 > New Revision: 350970 > > URL: http://llvm.org/viewvc/llvm-project?rev=350970&view=rev > Log: > [Darwin][Driver] Don't pass a file as object_path_lto during ThinLTO > > Summary: > After r327851, Driver::GetTemporaryPath will create the file rather than > just create a potientially unqine filename. If clang driver pass the > file as parameter as -object_path_lto, ld64 will pass it back to libLTO > as GeneratedObjectsDirectory, which is going to cause a LLVM ERROR if it > is not a directory. > Now during thinLTO, pass a temp directory path to linker instread. > > rdar://problem/47194182 > > Reviewers: arphaman, dexonsmith > > Reviewed By: arphaman > > Subscribers: mehdi_amini, inglorion, jkorous, cfe-commits > > Differential Revision: https://reviews.llvm.org/D56608 > > Modified: >cfe/trunk/include/clang/Driver/Driver.h >cfe/trunk/lib/Driver/Driver.cpp >cfe/trunk/lib/Driver/ToolChains/Darwin.cpp >cfe/trunk/test/Driver/darwin-ld-lto.c > > Modified: cfe/trunk/include/clang/Driver/Driver.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Driver.h?rev=350970&r1=350969&r2=350970&view=diff > == > --- cfe/trunk/include/clang/Driver/Driver.h (original) > +++ cfe/trunk/include/clang/Driver/Driver.h Fri Jan 11 13:16:04 2019 > @@ -505,6 +505,10 @@ public: > /// GCC goes to extra lengths here to be a bit more robust. > std::string GetTemporaryPath(StringRef Prefix, StringRef Suffix) const; > > + /// GetTemporaryDirectory - Return the pathname of a temporary directory to > + /// use as part of compilation; the directory will have the given prefix. > + std::string GetTemporaryDirectory(StringRef Prefix) const; > + > /// Return the pathname of the pch file in clang-cl mode. > std::string GetClPchPath(Compilation &C, StringRef BaseName) const; > > > Modified: cfe/trunk/lib/Driver/Driver.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=350970&r1=350969&r2=350970&view=diff > == > --- cfe/trunk/lib/Driver/Driver.cpp (original) > +++ cfe/trunk/lib/Driver/Driver.cpp Fri Jan 11 13:16:04 2019 > @@ -4478,6 +4478,17 @@ std::string Driver::GetTemporaryPath(Str > return Path.str(); > } > > +std::string Driver::GetTemporaryDirectory(StringRef Prefix) const { > + SmallString<128> Path; > + std::error_code EC = llvm::sys::fs::createUniqueDirectory(Prefix, Path); > + if (EC) { > +Diag(clang::diag::err_unable_to_make_temp) << EC.message(); > +return ""; > + } > + > + return Path.str(); > +} > + > std::string Driver::GetClPchPath(Compilation &C, StringRef BaseName) const { > SmallString<128> Output; > if (Arg *FpArg = C.getArgs().getLastArg(options::OPT__SLASH_Fp)) { > > Modified: cfe/trunk/lib/Driver/ToolChains/Darwin.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Darwin.cpp?rev=350970&r1=350969&r2=350970&view=diff > == > --- cfe/trunk/lib/Driver/ToolChains/Darwin.cpp (original) > +++ cfe/trunk/lib/Driver/ToolChains/Darwin.cpp Fri Jan 11 13:16:04 2019 > @@ -224,13 +224,20 @@ void darwin::Linker::AddLinkArgs(Compila >options::OPT_fno_application_extension, false)) > CmdArgs.push_back("-application_extension"); > > - if (D.isUsingLTO()) { > -// If we are using LTO, then automatically create a temporary file path > for > -// the linker to use, so that it's lifetime will extend past a possible > -// dsymutil step. > -if (Version[0] >= 116 && NeedsTempPath(Inputs)) { > - const char *TmpPath = C.getArgs().MakeArgString( > - D.GetTemporaryPath("cc", > types::getTypeTempSuffix(types::TY_Object))); > + if (D.isUsingLTO() && Version[0] >= 116 && NeedsTempPath(Inputs)) { > +std::string TmpPathName; > +if (D.getLTOMode() == LTOK_Full) { > + // If we are using full LTO, then automatically create a temporary file > + // path for the linker to use, so that it's lifetime will extend past a > + // possible dsymutil step. > + TmpPathName = > + D.GetTemporaryPath("cc", > types::getTypeTempSuffix(types::TY_Object)); > +} else if (D.getLTOMode() == LTOK_Thin) > + // If we are using thin LTO, then create a directory instead. > + TmpPathName = D.GetTemporaryDirectory("thinlto"); > + > +if (!TmpPathName.empty()) { > + auto *TmpPath = C.getArgs().MakeArgString(TmpPathName); > C.addTempFile(TmpPath); > CmdArgs.push_back("-object_path_lto"); > CmdArgs.push_back(TmpPath); > > Modified: cfe/trunk/test/Driver/darwin-ld-lto.c > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/darwin-ld-lto.c?rev=350970&r1=350969
r352539 - Revert "OpenCL: Extend argument promotion rules to vector types"
Author: arsenm Date: Tue Jan 29 12:49:47 2019 New Revision: 352539 URL: http://llvm.org/viewvc/llvm-project?rev=352539&view=rev Log: Revert "OpenCL: Extend argument promotion rules to vector types" This reverts r348083. This was based on a misreading of the spec for printf specifiers. Also revert r343653, as without a subsequent patch, a correctly specified format for a vector will incorrectly warn. Fixes bug 40491. Modified: cfe/trunk/lib/Headers/opencl-c.h cfe/trunk/lib/Sema/SemaExpr.cpp cfe/trunk/test/CodeGenOpenCL/printf.cl cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl cfe/trunk/test/SemaOpenCL/printf-format-strings.cl Modified: cfe/trunk/lib/Headers/opencl-c.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/opencl-c.h?rev=352539&r1=352538&r2=352539&view=diff == --- cfe/trunk/lib/Headers/opencl-c.h (original) +++ cfe/trunk/lib/Headers/opencl-c.h Tue Jan 29 12:49:47 2019 @@ -14469,7 +14469,7 @@ half16 __ovld __cnfn shuffle2(half16 x, #if __OPENCL_C_VERSION__ >= CL_VERSION_1_2 // OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf -int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); +int printf(__constant const char* st, ...); #endif // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions Modified: cfe/trunk/lib/Sema/SemaExpr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=352539&r1=352538&r2=352539&view=diff == --- cfe/trunk/lib/Sema/SemaExpr.cpp (original) +++ cfe/trunk/lib/Sema/SemaExpr.cpp Tue Jan 29 12:49:47 2019 @@ -737,33 +737,20 @@ ExprResult Sema::DefaultArgumentPromotio return ExprError(); E = Res.get(); - QualType ScalarTy = Ty; - unsigned NumElts = 0; - if (const ExtVectorType *VecTy = Ty->getAs()) { -NumElts = VecTy->getNumElements(); -ScalarTy = VecTy->getElementType(); - } - // If this is a 'float' or '__fp16' (CVR qualified or typedef) // promote to double. // Note that default argument promotion applies only to float (and // half/fp16); it does not apply to _Float16. - const BuiltinType *BTy = ScalarTy->getAs(); + const BuiltinType *BTy = Ty->getAs(); if (BTy && (BTy->getKind() == BuiltinType::Half || BTy->getKind() == BuiltinType::Float)) { if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp64")) { - if (BTy->getKind() == BuiltinType::Half) { -QualType Ty = Context.FloatTy; -if (NumElts != 0) - Ty = Context.getExtVectorType(Ty, NumElts); -E = ImpCastExprToType(E, Ty, CK_FloatingCast).get(); - } +if (BTy->getKind() == BuiltinType::Half) { +E = ImpCastExprToType(E, Context.FloatTy, CK_FloatingCast).get(); +} } else { - QualType Ty = Context.DoubleTy; - if (NumElts != 0) -Ty = Context.getExtVectorType(Ty, NumElts); - E = ImpCastExprToType(E, Ty, CK_FloatingCast).get(); + E = ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast).get(); } } Modified: cfe/trunk/test/CodeGenOpenCL/printf.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/printf.cl?rev=352539&r1=352538&r2=352539&view=diff == --- cfe/trunk/test/CodeGenOpenCL/printf.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/printf.cl Tue Jan 29 12:49:47 2019 @@ -12,28 +12,26 @@ int printf(__constant const char* st, .. // ALL-LABEL: @test_printf_float2( -// FP64: %conv = fpext <2 x float> %0 to <2 x double> -// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv) +// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0) -// NOFP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0) + +// NOFP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0) kernel void test_printf_float2(float2 arg) { - printf("%v2f", arg); + printf("%v2hlf", arg); } // ALL-LABEL: @test_printf_half2( -// FP64: %conv = fpext <2 x half> %0 to <2 x double> -// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv) #2 +// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.s
r352540 - OpenCL: Use length modifier for warning on vector printf arguments
Author: arsenm Date: Tue Jan 29 12:49:54 2019 New Revision: 352540 URL: http://llvm.org/viewvc/llvm-project?rev=352540&view=rev Log: OpenCL: Use length modifier for warning on vector printf arguments Re-enable format string warnings on printf. The warnings are still incomplete. Apparently it is undefined to use a vector specifier without a length modifier, which is not currently warned on. Additionally, type warnings appear to not be working with the hh modifier, and aren't warning on all of the special restrictions from c99 printf. Modified: cfe/trunk/include/clang/AST/FormatString.h cfe/trunk/lib/AST/FormatString.cpp cfe/trunk/lib/AST/PrintfFormatString.cpp cfe/trunk/lib/AST/ScanfFormatString.cpp cfe/trunk/lib/Headers/opencl-c.h cfe/trunk/lib/Sema/SemaChecking.cpp cfe/trunk/test/Sema/format-strings.c cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl cfe/trunk/test/SemaOpenCL/printf-format-strings.cl Modified: cfe/trunk/include/clang/AST/FormatString.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/FormatString.h?rev=352540&r1=352539&r2=352540&view=diff == --- cfe/trunk/include/clang/AST/FormatString.h (original) +++ cfe/trunk/include/clang/AST/FormatString.h Tue Jan 29 12:49:54 2019 @@ -67,6 +67,7 @@ public: None, AsChar, // 'hh' AsShort, // 'h' +AsShortLong, // 'hl' (OpenCL float/int vector element) AsLong, // 'l' AsLongLong, // 'll' AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) @@ -436,7 +437,8 @@ public: bool usesPositionalArg() const { return UsesPositionalArg; } - bool hasValidLengthModifier(const TargetInfo &Target) const; + bool hasValidLengthModifier(const TargetInfo &Target, + const LangOptions &LO) const; bool hasStandardLengthModifier() const; Modified: cfe/trunk/lib/AST/FormatString.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/FormatString.cpp?rev=352540&r1=352539&r2=352540&view=diff == --- cfe/trunk/lib/AST/FormatString.cpp (original) +++ cfe/trunk/lib/AST/FormatString.cpp Tue Jan 29 12:49:54 2019 @@ -223,6 +223,9 @@ clang::analyze_format_string::ParseLengt if (I != E && *I == 'h') { ++I; lmKind = LengthModifier::AsChar; + } else if (I != E && *I == 'l' && LO.OpenCL) { +++I; +lmKind = LengthModifier::AsShortLong; } else { lmKind = LengthModifier::AsShort; } @@ -487,7 +490,8 @@ ArgType::matchesType(ASTContext &C, Qual } ArgType ArgType::makeVectorType(ASTContext &C, unsigned NumElts) const { - if (K != SpecificTy) // Won't be a valid vector element type. + // Check for valid vector element types. + if (T.isNull()) return ArgType::Invalid(); QualType Vec = C.getExtVectorType(T, NumElts); @@ -572,6 +576,8 @@ analyze_format_string::LengthModifier::t return "hh"; case AsShort: return "h"; + case AsShortLong: +return "hl"; case AsLong: // or AsWideChar return "l"; case AsLongLong: @@ -707,13 +713,18 @@ void OptionalAmount::toString(raw_ostrea } } -bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target) const { +bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target, + const LangOptions &LO) const { switch (LM.getKind()) { case LengthModifier::None: return true; // Handle most integer flags case LengthModifier::AsShort: + // Length modifier only applies to FP vectors. + if (LO.OpenCL && CS.isDoubleArg()) +return !VectorNumElts.isInvalid(); + if (Target.getTriple().isOSMSVCRT()) { switch (CS.getKind()) { case ConversionSpecifier::cArg: @@ -752,8 +763,18 @@ bool FormatSpecifier::hasValidLengthModi return false; } +case LengthModifier::AsShortLong: + return LO.OpenCL && !VectorNumElts.isInvalid(); + // Handle 'l' flag case LengthModifier::AsLong: // or AsWideChar + if (CS.isDoubleArg()) { +// Invalid for OpenCL FP scalars. +if (LO.OpenCL && VectorNumElts.isInvalid()) + return false; +return true; + } + switch (CS.getKind()) { case ConversionSpecifier::dArg: case ConversionSpecifier::DArg: @@ -764,14 +785,6 @@ bool FormatSpecifier::hasValidLengthModi case ConversionSpecifier::UArg: case ConversionSpecifier::xArg: case ConversionSpecifier::XArg: -case ConversionSpecifier::aArg: -case ConversionSpecifier::AArg: -case ConversionSpecifier::fArg: -case ConversionSpecifier::FArg: -case ConversionSpecifier::eArg: -case ConversionSpecifier::EArg: -case
r352544 - OpenCL: Try to fix bot test failure
Author: arsenm Date: Tue Jan 29 13:14:56 2019 New Revision: 352544 URL: http://llvm.org/viewvc/llvm-project?rev=352544&view=rev Log: OpenCL: Try to fix bot test failure Modified: cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl Modified: cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl?rev=352544&r1=352543&r2=352544&view=diff == --- cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl (original) +++ cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl Tue Jan 29 13:14:56 2019 @@ -3,6 +3,8 @@ // RUN: %clang_cc1 -cl-std=CL1.2 -fsyntax-only -pedantic -Wall -Werror %t // RUN: %clang_cc1 -cl-std=CL1.2 -E -o - %t | FileCheck %s +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + typedef __attribute__((ext_vector_type(4))) char char4; typedef __attribute__((ext_vector_type(4))) short short4; typedef __attribute__((ext_vector_type(4))) int int4; ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r346806 - OpenCL: Don't warn on v printf modifier
Author: arsenm Date: Tue Nov 13 14:30:35 2018 New Revision: 346806 URL: http://llvm.org/viewvc/llvm-project?rev=346806&view=rev Log: OpenCL: Don't warn on v printf modifier This avoids spurious warnings, but could use a lot of work. For example the number of vector elements is not verified, and the passed value type is not checked. Fixes bug 39486 Added: cfe/trunk/test/SemaOpenCL/printf-format-strings.cl Modified: cfe/trunk/include/clang/AST/FormatString.h cfe/trunk/lib/AST/FormatString.cpp cfe/trunk/lib/AST/PrintfFormatString.cpp cfe/trunk/test/Sema/format-strings.c Modified: cfe/trunk/include/clang/AST/FormatString.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/FormatString.h?rev=346806&r1=346805&r2=346806&view=diff == --- cfe/trunk/include/clang/AST/FormatString.h (original) +++ cfe/trunk/include/clang/AST/FormatString.h Tue Nov 13 14:30:35 2018 @@ -166,6 +166,8 @@ public: ZArg, // MS extension +VArg, // OpenCL vectors + // Objective-C specific specifiers. ObjCObjArg, // '@' ObjCBeg = ObjCObjArg, Modified: cfe/trunk/lib/AST/FormatString.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/FormatString.cpp?rev=346806&r1=346805&r2=346806&view=diff == --- cfe/trunk/lib/AST/FormatString.cpp (original) +++ cfe/trunk/lib/AST/FormatString.cpp Tue Nov 13 14:30:35 2018 @@ -618,6 +618,9 @@ const char *ConversionSpecifier::toStrin // MS specific specifiers. case ZArg: return "Z"; + + // OpenCL specific specifiers. + case VArg: return "v"; } return nullptr; } @@ -875,6 +878,8 @@ bool FormatSpecifier::hasStandardConvers case ConversionSpecifier::CArg: case ConversionSpecifier::SArg: return LangOpt.ObjC; +case ConversionSpecifier::VArg: + return LangOpt.OpenCL; case ConversionSpecifier::InvalidSpecifier: case ConversionSpecifier::FreeBSDbArg: case ConversionSpecifier::FreeBSDDArg: Modified: cfe/trunk/lib/AST/PrintfFormatString.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/PrintfFormatString.cpp?rev=346806&r1=346805&r2=346806&view=diff == --- cfe/trunk/lib/AST/PrintfFormatString.cpp (original) +++ cfe/trunk/lib/AST/PrintfFormatString.cpp Tue Nov 13 14:30:35 2018 @@ -362,6 +362,12 @@ static PrintfSpecifierResult ParsePrintf case 'Z': if (Target.getTriple().isOSMSVCRT()) k = ConversionSpecifier::ZArg; + break; +// OpenCL specific. +case 'v': + if (LO.OpenCL) +k = ConversionSpecifier::VArg; + break; } // Check to see if we used the Objective-C modifier flags with @@ -1026,6 +1032,7 @@ bool PrintfSpecifier::hasValidPrecision( case ConversionSpecifier::FreeBSDrArg: case ConversionSpecifier::FreeBSDyArg: case ConversionSpecifier::PArg: + case ConversionSpecifier::VArg: return true; default: Modified: cfe/trunk/test/Sema/format-strings.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/format-strings.c?rev=346806&r1=346805&r2=346806&view=diff == --- cfe/trunk/test/Sema/format-strings.c (original) +++ cfe/trunk/test/Sema/format-strings.c Tue Nov 13 14:30:35 2018 @@ -613,6 +613,11 @@ void pr12761(char c) { printf("%hhx", c); } +void test_opencl_vector_format(int x) { + printf("%v4d", x); // expected-warning{{invalid conversion specifier 'v'}} + printf("%vd", x); // expected-warning{{invalid conversion specifier 'v'}} + printf("%0vd", x); // expected-warning{{invalid conversion specifier 'v'}} +} // Test that we correctly merge the format in both orders. extern void test14_foo(const char *, const char *, ...) Added: cfe/trunk/test/SemaOpenCL/printf-format-strings.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/printf-format-strings.cl?rev=346806&view=auto == --- cfe/trunk/test/SemaOpenCL/printf-format-strings.cl (added) +++ cfe/trunk/test/SemaOpenCL/printf-format-strings.cl Tue Nov 13 14:30:35 2018 @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -cl-std=CL1.2 -fsyntax-only -verify %s + +typedef __attribute__((ext_vector_type(2))) float float2; +typedef __attribute__((ext_vector_type(4))) float float4; +typedef __attribute__((ext_vector_type(4))) int int4; + +int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); + +kernel void format_v4f32(float4 arg) +{ +printf("%v4f\n", arg); // expected-no-diagnostics +} + +kernel void format_v4f32_wrong_num_elts(float2 arg) +{ +printf("%v4f\n", arg); // expected-no-diagnostics +} + +kernel void vector_precision_modifier_v4f32(float4 arg) +{ +printf("%.2v4f\n", arg); // expected-no-diagnostics +} +
r347873 - Mark __builtin_shufflevector as using custom type checking
Author: arsenm Date: Thu Nov 29 07:45:05 2018 New Revision: 347873 URL: http://llvm.org/viewvc/llvm-project?rev=347873&view=rev Log: Mark __builtin_shufflevector as using custom type checking The custom handling seems to all be implemented already. This avoids regressions in a future patch when float vectors are ordinarily promoted to double vectors in variadic calls. Modified: cfe/trunk/include/clang/Basic/Builtins.def Modified: cfe/trunk/include/clang/Basic/Builtins.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Builtins.def?rev=347873&r1=347872&r2=347873&view=diff == --- cfe/trunk/include/clang/Basic/Builtins.def (original) +++ cfe/trunk/include/clang/Basic/Builtins.def Thu Nov 29 07:45:05 2018 @@ -538,7 +538,7 @@ BUILTIN(__builtin_readcyclecounter, "ULL BUILTIN(__builtin_trap, "v", "nr") BUILTIN(__builtin_debugtrap, "v", "n") BUILTIN(__builtin_unreachable, "v", "nr") -BUILTIN(__builtin_shufflevector, "v." , "nc") +BUILTIN(__builtin_shufflevector, "v." , "nct") BUILTIN(__builtin_convertvector, "v." , "nct") BUILTIN(__builtin_alloca, "v*z" , "Fn") BUILTIN(__builtin_alloca_with_align, "v*zIz", "Fn") ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r348083 - OpenCL: Extend argument promotion rules to vector types
Author: arsenm Date: Sat Dec 1 13:56:10 2018 New Revision: 348083 URL: http://llvm.org/viewvc/llvm-project?rev=348083&view=rev Log: OpenCL: Extend argument promotion rules to vector types The spec is ambiguous on whether vector types are allowed to be implicitly converted. The only legal context I think this can be used for OpenCL is printf, where it seems necessary. Added: cfe/trunk/test/CodeGenOpenCL/printf.cl Modified: cfe/trunk/lib/Sema/SemaExpr.cpp Modified: cfe/trunk/lib/Sema/SemaExpr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=348083&r1=348082&r2=348083&view=diff == --- cfe/trunk/lib/Sema/SemaExpr.cpp (original) +++ cfe/trunk/lib/Sema/SemaExpr.cpp Sat Dec 1 13:56:10 2018 @@ -730,20 +730,33 @@ ExprResult Sema::DefaultArgumentPromotio return ExprError(); E = Res.get(); + QualType ScalarTy = Ty; + unsigned NumElts = 0; + if (const ExtVectorType *VecTy = Ty->getAs()) { +NumElts = VecTy->getNumElements(); +ScalarTy = VecTy->getElementType(); + } + // If this is a 'float' or '__fp16' (CVR qualified or typedef) // promote to double. // Note that default argument promotion applies only to float (and // half/fp16); it does not apply to _Float16. - const BuiltinType *BTy = Ty->getAs(); + const BuiltinType *BTy = ScalarTy->getAs(); if (BTy && (BTy->getKind() == BuiltinType::Half || BTy->getKind() == BuiltinType::Float)) { if (getLangOpts().OpenCL && !getOpenCLOptions().isEnabled("cl_khr_fp64")) { -if (BTy->getKind() == BuiltinType::Half) { -E = ImpCastExprToType(E, Context.FloatTy, CK_FloatingCast).get(); -} + if (BTy->getKind() == BuiltinType::Half) { +QualType Ty = Context.FloatTy; +if (NumElts != 0) + Ty = Context.getExtVectorType(Ty, NumElts); +E = ImpCastExprToType(E, Ty, CK_FloatingCast).get(); + } } else { - E = ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast).get(); + QualType Ty = Context.DoubleTy; + if (NumElts != 0) +Ty = Context.getExtVectorType(Ty, NumElts); + E = ImpCastExprToType(E, Ty, CK_FloatingCast).get(); } } Added: cfe/trunk/test/CodeGenOpenCL/printf.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/printf.cl?rev=348083&view=auto == --- cfe/trunk/test/CodeGenOpenCL/printf.cl (added) +++ cfe/trunk/test/CodeGenOpenCL/printf.cl Sat Dec 1 13:56:10 2018 @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-+cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s +// RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s + +typedef __attribute__((ext_vector_type(2))) float float2; +typedef __attribute__((ext_vector_type(2))) half half2; + +#ifdef cl_khr_fp64 +typedef __attribute__((ext_vector_type(2))) double double2; +#endif + +int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); + + +// ALL-LABEL: @test_printf_float2( +// FP64: %conv = fpext <2 x float> %0 to <2 x double> +// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv) + +// NOFP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0) +kernel void test_printf_float2(float2 arg) { + printf("%v2f", arg); +} + +// ALL-LABEL: @test_printf_half2( +// FP64: %conv = fpext <2 x half> %0 to <2 x double> +// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv) #2 + +// NOFP64: %conv = fpext <2 x half> %0 to <2 x float> +// NOFP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %conv) #2 +kernel void test_printf_half2(half2 arg) { + printf("%v2f", arg); +} + +#ifdef cl_khr_fp64 +// FP64-LABEL: @test_printf_double2( +// FP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %0) #2 +kernel void test_printf_double2(double2 arg) { + printf("%v2f", arg); +} +#endif ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r348084 - OpenCL: Improve vector printf warnings
Author: arsenm Date: Sat Dec 1 14:16:27 2018 New Revision: 348084 URL: http://llvm.org/viewvc/llvm-project?rev=348084&view=rev Log: OpenCL: Improve vector printf warnings The vector modifier is considered separate, so don't treat it as a conversion specifier. This is still not warning on some cases, like using a type that isn't a valid vector element. Fixes bug 39652 Added: cfe/trunk/test/SemaOpenCL/format-strings-fixit.cl Modified: cfe/trunk/include/clang/AST/FormatString.h cfe/trunk/lib/AST/FormatString.cpp cfe/trunk/lib/AST/FormatStringParsing.h cfe/trunk/lib/AST/PrintfFormatString.cpp cfe/trunk/test/SemaOpenCL/printf-format-strings.cl Modified: cfe/trunk/include/clang/AST/FormatString.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/FormatString.h?rev=348084&r1=348083&r2=348084&view=diff == --- cfe/trunk/include/clang/AST/FormatString.h (original) +++ cfe/trunk/include/clang/AST/FormatString.h Sat Dec 1 14:16:27 2018 @@ -166,8 +166,6 @@ public: ZArg, // MS extension -VArg, // OpenCL vectors - // Objective-C specific specifiers. ObjCObjArg, // '@' ObjCBeg = ObjCObjArg, @@ -305,6 +303,8 @@ public: QualType getRepresentativeType(ASTContext &C) const; + ArgType makeVectorType(ASTContext &C, unsigned NumElts) const; + std::string getRepresentativeTypeName(ASTContext &C) const; }; @@ -324,6 +324,10 @@ public: : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0), UsesPositionalArg(0), UsesDotPrefix(0) {} + explicit OptionalAmount(unsigned Amount) +: start(nullptr), length(0), hs(Constant), amt(Amount), +UsesPositionalArg(false), UsesDotPrefix(false) {} + bool isInvalid() const { return hs == Invalid; } @@ -381,6 +385,8 @@ protected: LengthModifier LM; OptionalAmount FieldWidth; ConversionSpecifier CS; + OptionalAmount VectorNumElts; + /// Positional arguments, an IEEE extension: /// IEEE Std 1003.1, 2004 Edition /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html @@ -388,7 +394,8 @@ protected: unsigned argIndex; public: FormatSpecifier(bool isPrintf) -: CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} +: CS(isPrintf), VectorNumElts(false), + UsesPositionalArg(false), argIndex(0) {} void setLengthModifier(LengthModifier lm) { LM = lm; @@ -416,6 +423,14 @@ public: return FieldWidth; } + void setVectorNumElts(const OptionalAmount &Amt) { +VectorNumElts = Amt; + } + + const OptionalAmount &getVectorNumElts() const { +return VectorNumElts; + } + void setFieldWidth(const OptionalAmount &Amt) { FieldWidth = Amt; } @@ -480,6 +495,9 @@ class PrintfSpecifier : public analyze_f OptionalFlag IsSensitive; // '{sensitive}' OptionalAmount Precision; StringRef MaskType; + + ArgType getScalarArgType(ASTContext &Ctx, bool IsObjCLiteral) const; + public: PrintfSpecifier() : FormatSpecifier(/* isPrintf = */ true), HasThousandsGrouping("'"), Modified: cfe/trunk/lib/AST/FormatString.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/FormatString.cpp?rev=348084&r1=348083&r2=348084&view=diff == --- cfe/trunk/lib/AST/FormatString.cpp (original) +++ cfe/trunk/lib/AST/FormatString.cpp Sat Dec 1 14:16:27 2018 @@ -179,6 +179,36 @@ clang::analyze_format_string::ParseArgPo } bool +clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H, + FormatSpecifier &FS, + const char *&I, + const char *E, + const LangOptions &LO) { + if (!LO.OpenCL) +return false; + + const char *Start = I; + if (*I == 'v') { +++I; + +if (I == E) { + H.HandleIncompleteSpecifier(Start, E - Start); + return true; +} + +OptionalAmount NumElts = ParseAmount(I, E); +if (NumElts.getHowSpecified() != OptionalAmount::Constant) { + H.HandleIncompleteSpecifier(Start, E - Start); + return true; +} + +FS.setVectorNumElts(NumElts); + } + + return false; +} + +bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, const char *&I, const char *E, @@ -457,6 +487,14 @@ ArgType::matchesType(ASTContext &C, Qual llvm_unreachable("Invalid ArgType Kind!"); } +ArgType ArgType::makeVectorType(ASTContext &C, unsigned NumElts) const { + if (K != SpecificTy) // Won't be a valid vector element type. +return ArgType::Invalid(); + + QualType Vec = C.getExtVectorType(T, NumElts); + return ArgType(Vec, Name); +} + QualType ArgType::getRepresentativeType(A
r348809 - Update test for instcombine change
Author: arsenm Date: Mon Dec 10 15:02:40 2018 New Revision: 348809 URL: http://llvm.org/viewvc/llvm-project?rev=348809&view=rev Log: Update test for instcombine change Modified: cfe/trunk/test/CodeGen/vector.c Modified: cfe/trunk/test/CodeGen/vector.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/vector.c?rev=348809&r1=348808&r2=348809&view=diff == --- cfe/trunk/test/CodeGen/vector.c (original) +++ cfe/trunk/test/CodeGen/vector.c Mon Dec 10 15:02:40 2018 @@ -70,7 +70,7 @@ vec_int1 lax_vector_compare1(int x, vec_ } // CHECK: define i32 @lax_vector_compare1(i32 {{.*}}, i32 {{.*}}) -// CHECK: icmp eq <1 x i32> +// CHECK: icmp eq i32 typedef int vec_int2 __attribute__((vector_size(8))); vec_int2 lax_vector_compare2(long long x, vec_int2 y) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r356354 - Add testcase from bug 41079
Author: arsenm Date: Sun Mar 17 16:16:31 2019 New Revision: 356354 URL: http://llvm.org/viewvc/llvm-project?rev=356354&view=rev Log: Add testcase from bug 41079 Modified: cfe/trunk/test/CodeGen/builtin-expect.c Modified: cfe/trunk/test/CodeGen/builtin-expect.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtin-expect.c?rev=356354&r1=356353&r2=356354&view=diff == --- cfe/trunk/test/CodeGen/builtin-expect.c (original) +++ cfe/trunk/test/CodeGen/builtin-expect.c Sun Mar 17 16:16:31 2019 @@ -78,3 +78,20 @@ int switch_cond(int x) { return 0; } +int variable_expected(int stuff) { +// ALL-LABEL: define i32 @variable_expected( +// O1: call i64 @llvm.expect.i64(i64 {{%.*}}, i64 {{%.*}}) +// O0-NOT: @llvm.expect + + int res = 0; + + switch (__builtin_expect(stuff, stuff)) { + case 0: +res = 1; +break; + default: +break; + } + + return res; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r359918 - Ensure there is stack usage in stack size warning test
Author: arsenm Date: Fri May 3 12:04:14 2019 New Revision: 359918 URL: http://llvm.org/viewvc/llvm-project?rev=359918&view=rev Log: Ensure there is stack usage in stack size warning test r359906 broke this because the only stack usage was from a spill which can be avoided since the only block is a return. Modified: cfe/trunk/test/Misc/backend-stack-frame-diagnostics-fallback.cpp Modified: cfe/trunk/test/Misc/backend-stack-frame-diagnostics-fallback.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Misc/backend-stack-frame-diagnostics-fallback.cpp?rev=359918&r1=359917&r2=359918&view=diff == --- cfe/trunk/test/Misc/backend-stack-frame-diagnostics-fallback.cpp (original) +++ cfe/trunk/test/Misc/backend-stack-frame-diagnostics-fallback.cpp Fri May 3 12:04:14 2019 @@ -14,5 +14,7 @@ namespace frameSizeThunkWarning { // CHECK: warning: stack frame size of {{[0-9]+}} bytes in function 'frameSizeThunkWarning::B::f' // CHECK: warning: stack size limit exceeded ({{[0-9]+}}) in {{[^ ]+}} - void B::f() { } + void B::f() { +volatile int x = 0; // Ensure there is stack usage. + } } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r343653 - OpenCL: Mark printf format string argument
Author: arsenm Date: Tue Oct 2 19:01:19 2018 New Revision: 343653 URL: http://llvm.org/viewvc/llvm-project?rev=343653&view=rev Log: OpenCL: Mark printf format string argument Fixes not warning on format string errors. Added: cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl Modified: cfe/trunk/lib/Headers/opencl-c.h Modified: cfe/trunk/lib/Headers/opencl-c.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/opencl-c.h?rev=343653&r1=343652&r2=343653&view=diff == --- cfe/trunk/lib/Headers/opencl-c.h (original) +++ cfe/trunk/lib/Headers/opencl-c.h Tue Oct 2 19:01:19 2018 @@ -14462,7 +14462,7 @@ half16 __ovld __cnfn shuffle2(half16 x, #if __OPENCL_C_VERSION__ >= CL_VERSION_1_2 // OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf -int printf(__constant const char* st, ...); +int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); #endif // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions Added: cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl?rev=343653&view=auto == --- cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl (added) +++ cfe/trunk/test/SemaOpenCL/printf-format-string-warnings.cl Tue Oct 2 19:01:19 2018 @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -finclude-default-header + +// Make sure warnings are produced based on printf format strings. + + +kernel void format_string_warnings(__constant char* arg) { + + printf("%d", arg); // expected-warning {{format specifies type 'int' but the argument has type '__constant char *'}} + + printf("not enough arguments %d %d", 4); // expected-warning {{more '%' conversions than data arguments}} + + printf("too many arguments", 4); // expected-warning {{data argument not used by format string}} +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r354624 - AMDGPU: Don't emit debugger subtarget features
Author: arsenm Date: Thu Feb 21 13:31:43 2019 New Revision: 354624 URL: http://llvm.org/viewvc/llvm-project?rev=354624&view=rev Log: AMDGPU: Don't emit debugger subtarget features Keep the flag around for compatability. Modified: cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp cfe/trunk/test/Driver/amdgpu-features.c Modified: cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp?rev=354624&r1=354623&r2=354624&view=diff == --- cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/AMDGPU.cpp Thu Feb 21 13:31:43 2019 @@ -38,15 +38,8 @@ void amdgpu::Linker::ConstructJob(Compil void amdgpu::getAMDGPUTargetFeatures(const Driver &D, const llvm::opt::ArgList &Args, std::vector &Features) { - if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi)) { -StringRef value = dAbi->getValue(); -if (value == "1.0") { - Features.push_back("+amdgpu-debugger-insert-nops"); - Features.push_back("+amdgpu-debugger-emit-prologue"); -} else { - D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args); -} - } + if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi)) +D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args); handleTargetFeaturesGroup( Args, Features, options::OPT_m_amdgpu_Features_Group); Modified: cfe/trunk/test/Driver/amdgpu-features.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/amdgpu-features.c?rev=354624&r1=354623&r2=354624&view=diff == --- cfe/trunk/test/Driver/amdgpu-features.c (original) +++ cfe/trunk/test/Driver/amdgpu-features.c Thu Feb 21 13:31:43 2019 @@ -4,7 +4,7 @@ // RUN: %clang -### -target amdgcn -x cl -S -emit-llvm -mcpu=kaveri -mamdgpu-debugger-abi=1.0 %s -o - 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-MAMDGPU-DEBUGGER-ABI-1-0 %s -// CHECK-MAMDGPU-DEBUGGER-ABI-1-0: "-target-feature" "+amdgpu-debugger-insert-nops" "-target-feature" "+amdgpu-debugger-emit-prologue" +// CHECK-MAMDGPU-DEBUGGER-ABI-1-0: the clang compiler does not support '-mamdgpu-debugger-abi=1.0' // RUN: %clang -### -target amdgcn -mcpu=gfx700 -mcode-object-v3 %s 2>&1 | FileCheck --check-prefix=CODE-OBJECT-V3 %s // CODE-OBJECT-V3: "-target-feature" "+code-object-v3" ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r363390 - AMDGPU: Use AMDGPU toolchain for other OSes
Author: arsenm Date: Fri Jun 14 06:39:57 2019 New Revision: 363390 URL: http://llvm.org/viewvc/llvm-project?rev=363390&view=rev Log: AMDGPU: Use AMDGPU toolchain for other OSes This would need more work to actually support them, but this is less wrong than the default. Modified: cfe/trunk/lib/Driver/Driver.cpp cfe/trunk/test/Driver/amdgpu-toolchain.c Modified: cfe/trunk/lib/Driver/Driver.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=363390&r1=363389&r2=363390&view=diff == --- cfe/trunk/lib/Driver/Driver.cpp (original) +++ cfe/trunk/lib/Driver/Driver.cpp Fri Jun 14 06:39:57 2019 @@ -4617,6 +4617,8 @@ const ToolChain &Driver::getToolChain(co TC = llvm::make_unique(*this, Target, Args); break; case llvm::Triple::AMDHSA: +case llvm::Triple::AMDPAL: +case llvm::Triple::Mesa3D: TC = llvm::make_unique(*this, Target, Args); break; case llvm::Triple::Win32: Modified: cfe/trunk/test/Driver/amdgpu-toolchain.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/amdgpu-toolchain.c?rev=363390&r1=363389&r2=363390&view=diff == --- cfe/trunk/test/Driver/amdgpu-toolchain.c (original) +++ cfe/trunk/test/Driver/amdgpu-toolchain.c Fri Jun 14 06:39:57 2019 @@ -1,6 +1,11 @@ // RUN: %clang -### -target amdgcn--amdhsa -x assembler -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=AS_LINK %s +// RUN: %clang -### -g -target amdgcn--amdhsa -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s +// RUN: %clang -### -target amdgcn-amd-amdpal -x assembler -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=AS_LINK %s +// RUN: %clang -### -g -target amdgcn-amd-amdpal -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s +// RUN: %clang -### -target amdgcn-mesa-mesa3d -x assembler -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=AS_LINK %s +// RUN: %clang -### -g -target amdgcn-mesa-mesa3d -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s + // AS_LINK: clang{{.*}} "-cc1as" // AS_LINK: ld.lld{{.*}} "-shared" -// RUN: %clang -### -g -target amdgcn--amdhsa -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s // DWARF_VER: "-dwarf-version=5" ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r363682 - AMDGPU: Disable errno by default
Author: arsenm Date: Tue Jun 18 06:59:32 2019 New Revision: 363682 URL: http://llvm.org/viewvc/llvm-project?rev=363682&view=rev Log: AMDGPU: Disable errno by default Modified: cfe/trunk/lib/Driver/ToolChains/AMDGPU.h cfe/trunk/test/Driver/fast-math.c Modified: cfe/trunk/lib/Driver/ToolChains/AMDGPU.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/AMDGPU.h?rev=363682&r1=363681&r2=363682&view=diff == --- cfe/trunk/lib/Driver/ToolChains/AMDGPU.h (original) +++ cfe/trunk/lib/Driver/ToolChains/AMDGPU.h Tue Jun 18 06:59:32 2019 @@ -57,6 +57,8 @@ public: const llvm::opt::ArgList &Args); unsigned GetDefaultDwarfVersion() const override { return 5; } bool IsIntegratedAssemblerDefault() const override { return true; } + bool IsMathErrnoDefault() const override { return false; } + llvm::opt::DerivedArgList * TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const override; Modified: cfe/trunk/test/Driver/fast-math.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/fast-math.c?rev=363682&r1=363681&r2=363682&view=diff == --- cfe/trunk/test/Driver/fast-math.c (original) +++ cfe/trunk/test/Driver/fast-math.c Tue Jun 18 06:59:32 2019 @@ -97,6 +97,12 @@ // RUN: | FileCheck --check-prefix=CHECK-NO-MATH-ERRNO %s // RUN: %clang -### -target x86_64-linux-android -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NO-MATH-ERRNO %s +// RUN: %clang -### -target amdgcn-amd-amdhsa -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NO-MATH-ERRNO %s +// RUN: %clang -### -target amdgcn-amd-amdpal -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NO-MATH-ERRNO %s +// RUN: %clang -### -target amdgcn-mesa-mesa3d -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NO-MATH-ERRNO %s // // Check that -ffast-math disables -fmath-errno, and -fno-fast-math merely // preserves the target default. Also check various flag set operations between ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r363684 - AMDGPU: Add GWS instruction builtins
Author: arsenm Date: Tue Jun 18 07:10:01 2019 New Revision: 363684 URL: http://llvm.org/viewvc/llvm-project?rev=363684&view=rev Log: AMDGPU: Add GWS instruction builtins Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=363684&r1=363683&r2=363684&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Tue Jun 18 07:10:01 2019 @@ -45,6 +45,8 @@ BUILTIN(__builtin_amdgcn_s_barrier, "v", BUILTIN(__builtin_amdgcn_wave_barrier, "v", "n") BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n") BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n") +BUILTIN(__builtin_amdgcn_ds_gws_init, "vUiUi", "n") +BUILTIN(__builtin_amdgcn_ds_gws_barrier, "vUiUi", "n") // FIXME: Need to disallow constant address space. BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n") Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=363684&r1=363683&r2=363684&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Tue Jun 18 07:10:01 2019 @@ -548,6 +548,18 @@ kernel void test_ds_consume_lds(global i *out = __builtin_amdgcn_ds_consume(ptr); } +// CHECK-LABEL: @test_gws_init( +// CHECK: call void @llvm.amdgcn.ds.gws.init(i32 %value, i32 %id) +kernel void test_gws_init(uint value, uint id) { + __builtin_amdgcn_ds_gws_init(value, id); +} + +// CHECK-LABEL: @test_gws_barrier( +// CHECK: call void @llvm.amdgcn.ds.gws.barrier(i32 %value, i32 %id) +kernel void test_gws_barrier(uint value, uint id) { + __builtin_amdgcn_ds_gws_barrier(value, id); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r363871 - Reapply "r363684: AMDGPU: Add GWS instruction builtins"
Author: arsenm Date: Wed Jun 19 12:55:49 2019 New Revision: 363871 URL: http://llvm.org/viewvc/llvm-project?rev=363871&view=rev Log: Reapply "r363684: AMDGPU: Add GWS instruction builtins" Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=363871&r1=363870&r2=363871&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Wed Jun 19 12:55:49 2019 @@ -45,6 +45,8 @@ BUILTIN(__builtin_amdgcn_s_barrier, "v", BUILTIN(__builtin_amdgcn_wave_barrier, "v", "n") BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n") BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n") +BUILTIN(__builtin_amdgcn_ds_gws_init, "vUiUi", "n") +BUILTIN(__builtin_amdgcn_ds_gws_barrier, "vUiUi", "n") // FIXME: Need to disallow constant address space. BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n") Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=363871&r1=363870&r2=363871&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Wed Jun 19 12:55:49 2019 @@ -548,6 +548,18 @@ kernel void test_ds_consume_lds(global i *out = __builtin_amdgcn_ds_consume(ptr); } +// CHECK-LABEL: @test_gws_init( +// CHECK: call void @llvm.amdgcn.ds.gws.init(i32 %value, i32 %id) +kernel void test_gws_init(uint value, uint id) { + __builtin_amdgcn_ds_gws_init(value, id); +} + +// CHECK-LABEL: @test_gws_barrier( +// CHECK: call void @llvm.amdgcn.ds.gws.barrier(i32 %value, i32 %id) +kernel void test_gws_barrier(uint value, uint id) { + __builtin_amdgcn_ds_gws_barrier(value, id); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r366286 - AMDGPU: Add some missing builtins
Author: arsenm Date: Tue Jul 16 17:01:03 2019 New Revision: 366286 URL: http://llvm.org/viewvc/llvm-project?rev=366286&view=rev Log: AMDGPU: Add some missing builtins Added: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=366286&r1=366285&r2=366286&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Tue Jul 16 17:01:03 2019 @@ -108,6 +108,16 @@ BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3 BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n") BUILTIN(__builtin_amdgcn_ds_append, "ii*3", "n") BUILTIN(__builtin_amdgcn_ds_consume, "ii*3", "n") +BUILTIN(__builtin_amdgcn_alignbit, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc") +BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc") +BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc") +BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc") +BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc") +BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc") //===--===// // CI+ only builtins. @@ -163,6 +173,13 @@ TARGET_BUILTIN(__builtin_amdgcn_sdot8, " TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot2-insts") //===--===// +// GFX10+ only builtins. +//===--===// +TARGET_BUILTIN(__builtin_amdgcn_permlane16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts") +TARGET_BUILTIN(__builtin_amdgcn_permlanex16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts") +TARGET_BUILTIN(__builtin_amdgcn_mov_dpp8, "UiUiIUi", "nc", "gfx10-insts") + +//===--===// // Special builtins. //===--===// BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=366286&r1=366285&r2=366286&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Jul 16 17:01:03 2019 @@ -12679,6 +12679,8 @@ Value *CodeGenFunction::EmitAMDGPUBuilti case AMDGPU::BI__builtin_amdgcn_ds_swizzle: return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); + case AMDGPU::BI__builtin_amdgcn_mov_dpp8: +return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8); case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector Args; @@ -12744,6 +12746,10 @@ Value *CodeGenFunction::EmitAMDGPUBuilti return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); case AMDGPU::BI__builtin_amdgcn_lerp: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); + case AMDGPU::BI__builtin_amdgcn_ubfe: +return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe); + case AMDGPU::BI__builtin_amdgcn_sbfe: +return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe); case AMDGPU::BI__builtin_amdgcn_uicmp: case AMDGPU::BI__builtin_amdgcn_uicmpl: case AMDGPU::BI__builtin_amdgcn_sicmp: Added: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl?rev=366286&view=auto == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl (added) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl Tue Jul 16 17:01:03 2019 @@ -0,0 +1,24 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck %s + +typedef unsigned int uint; + +// CHECK-LABEL: @test_permlane16( +// CHECK: call i32 @llvm.amdgcn.permlane16(i32 %a, i32 %b, i32 %c, i32 %d, i1 true, i1 true) +void test_permlane16(global uint* out, uint a, uint b, uint c, uint d)
r367431 - AMDGPU: Add missing builtin declarations
Author: arsenm Date: Wed Jul 31 07:03:05 2019 New Revision: 367431 URL: http://llvm.org/viewvc/llvm-project?rev=367431&view=rev Log: AMDGPU: Add missing builtin declarations Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=367431&r1=367430&r2=367431&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Wed Jul 31 07:03:05 2019 @@ -118,6 +118,13 @@ BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc") BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc") +BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "LUiLUiUiLUi", "nc") +BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "LUiLUiUiLUi", "nc") +BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiLUiUiV4Ui", "nc") //===--===// // CI+ only builtins. Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=367431&r1=367430&r2=367431&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Wed Jul 31 07:03:05 2019 @@ -9,6 +9,7 @@ typedef unsigned short ushort; typedef half __attribute__((ext_vector_type(2))) half2; typedef short __attribute__((ext_vector_type(2))) short2; typedef ushort __attribute__((ext_vector_type(2))) ushort2; +typedef uint __attribute__((ext_vector_type(4))) uint4; // CHECK-LABEL: @test_div_scale_f64 // CHECK: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) @@ -654,6 +655,48 @@ kernel void test_cvt_pk_u8_f32(global ui *out = __builtin_amdgcn_cvt_pk_u8_f32(src0, src1, src2); } +// CHECK-LABEL: @test_sad_u8( +// CHECK: tail call i32 @llvm.amdgcn.sad.u8(i32 %src0, i32 %src1, i32 %src2) +kernel void test_sad_u8(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_sad_u8(src0, src1, src2); +} + +// CHECK-LABEL: test_msad_u8( +// CHECK: call i32 @llvm.amdgcn.msad.u8(i32 %src0, i32 %src1, i32 %src2) +kernel void test_msad_u8(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_msad_u8(src0, src1, src2); +} + +// CHECK-LABEL: test_sad_hi_u8( +// CHECK: call i32 @llvm.amdgcn.sad.hi.u8(i32 %src0, i32 %src1, i32 %src2) +kernel void test_sad_hi_u8(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_sad_hi_u8(src0, src1, src2); +} + +// CHECK-LABEL: @test_sad_u16( +// CHECK: call i32 @llvm.amdgcn.sad.u16(i32 %src0, i32 %src1, i32 %src2) +kernel void test_sad_u16(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_sad_u16(src0, src1, src2); +} + +// CHECK-LABEL: @test_qsad_pk_u16_u8( +// CHECK: call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2) +kernel void test_qsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) { + *out = __builtin_amdgcn_qsad_pk_u16_u8(src0, src1, src2); +} + +// CHECK-LABEL: @test_mqsad_pk_u16_u8( +// CHECK: call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src0, i32 %src1, i64 %src2) +kernel void test_mqsad_pk_u16_u8(global ulong* out, ulong src0, uint src1, ulong src2) { + *out = __builtin_amdgcn_mqsad_pk_u16_u8(src0, src1, src2); +} + +// CHECK-LABEL: test_mqsad_u32_u8( +// CHECK: call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src0, i32 %src1, <4 x i32> %src2) +kernel void test_mqsad_u32_u8(global uint4* out, ulong src0, uint src1, uint4 src2) { + *out = __builtin_amdgcn_mqsad_u32_u8(src0, src1, src2); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r367973 - Builtins: Start adding half versions of math builtins
Author: arsenm Date: Mon Aug 5 20:28:37 2019 New Revision: 367973 URL: http://llvm.org/viewvc/llvm-project?rev=367973&view=rev Log: Builtins: Start adding half versions of math builtins The implementation of the OpenCL builtin currently library uses 2 different hacks to get to the corresponding IR intrinsics from the source. This will allow removal of those. This is the set that is currently used (minus a few vector ones). Added: cfe/trunk/test/CodeGenOpenCL/builtins-f16.cl Modified: cfe/trunk/include/clang/Basic/Builtins.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp Modified: cfe/trunk/include/clang/Basic/Builtins.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Builtins.def?rev=367973&r1=367972&r2=367973&view=diff == --- cfe/trunk/include/clang/Basic/Builtins.def (original) +++ cfe/trunk/include/clang/Basic/Builtins.def Mon Aug 5 20:28:37 2019 @@ -113,14 +113,17 @@ BUILTIN(__builtin_atan2l, "LdLdLd", "Fne BUILTIN(__builtin_abs , "ii" , "ncF") BUILTIN(__builtin_copysign, "ddd", "ncF") BUILTIN(__builtin_copysignf, "fff", "ncF") +BUILTIN(__builtin_copysignf16, "hhh", "ncF") BUILTIN(__builtin_copysignl, "LdLdLd", "ncF") BUILTIN(__builtin_copysignf128, "LLdLLdLLd", "ncF") BUILTIN(__builtin_fabs , "dd" , "ncF") BUILTIN(__builtin_fabsf, "ff" , "ncF") BUILTIN(__builtin_fabsl, "LdLd", "ncF") +BUILTIN(__builtin_fabsf16, "hh" , "ncF") BUILTIN(__builtin_fabsf128, "LLdLLd", "ncF") BUILTIN(__builtin_fmod , "ddd" , "Fne") BUILTIN(__builtin_fmodf, "fff" , "Fne") +BUILTIN(__builtin_fmodf16, "hhh" , "Fne") BUILTIN(__builtin_fmodl, "LdLdLd", "Fne") BUILTIN(__builtin_frexp , "ddi*" , "Fn") BUILTIN(__builtin_frexpf, "ffi*" , "Fn") @@ -154,6 +157,7 @@ BUILTIN(__builtin_powif, "ffi" , "Fnc") BUILTIN(__builtin_powil, "LdLdi", "Fnc") BUILTIN(__builtin_pow , "ddd" , "Fne") BUILTIN(__builtin_powf, "fff" , "Fne") +BUILTIN(__builtin_powf16, "hhh" , "Fne") BUILTIN(__builtin_powl, "LdLdLd", "Fne") // Standard unary libc/libm functions with double/float/long double variants: @@ -180,9 +184,11 @@ BUILTIN(__builtin_cbrtf, "ff", "Fnc") BUILTIN(__builtin_cbrtl, "LdLd", "Fnc") BUILTIN(__builtin_ceil , "dd" , "Fnc") BUILTIN(__builtin_ceilf, "ff" , "Fnc") +BUILTIN(__builtin_ceilf16, "hh" , "Fnc") BUILTIN(__builtin_ceill, "LdLd", "Fnc") BUILTIN(__builtin_cos , "dd" , "Fne") BUILTIN(__builtin_cosf, "ff" , "Fne") +BUILTIN(__builtin_cosf16, "hh" , "Fne") BUILTIN(__builtin_cosh , "dd" , "Fne") BUILTIN(__builtin_coshf, "ff" , "Fne") BUILTIN(__builtin_coshl, "LdLd", "Fne") @@ -195,9 +201,11 @@ BUILTIN(__builtin_erfcf, "ff", "Fne") BUILTIN(__builtin_erfcl, "LdLd", "Fne") BUILTIN(__builtin_exp , "dd" , "Fne") BUILTIN(__builtin_expf, "ff" , "Fne") +BUILTIN(__builtin_expf16, "hh" , "Fne") BUILTIN(__builtin_expl, "LdLd", "Fne") BUILTIN(__builtin_exp2 , "dd" , "Fne") BUILTIN(__builtin_exp2f, "ff" , "Fne") +BUILTIN(__builtin_exp2f16, "hh" , "Fne") BUILTIN(__builtin_exp2l, "LdLd", "Fne") BUILTIN(__builtin_expm1 , "dd", "Fne") BUILTIN(__builtin_expm1f, "ff", "Fne") @@ -207,15 +215,19 @@ BUILTIN(__builtin_fdimf, "fff", "Fne") BUILTIN(__builtin_fdiml, "LdLdLd", "Fne") BUILTIN(__builtin_floor , "dd" , "Fnc") BUILTIN(__builtin_floorf, "ff" , "Fnc") +BUILTIN(__builtin_floorf16, "hh" , "Fnc") BUILTIN(__builtin_floorl, "LdLd", "Fnc") BUILTIN(__builtin_fma, "", "Fne") BUILTIN(__builtin_fmaf, "", "Fne") +BUILTIN(__builtin_fmaf16, "", "Fne") BUILTIN(__builtin_fmal, "LdLdLdLd", "Fne") BUILTIN(__builtin_fmax, "ddd", "Fnc") BUILTIN(__builtin_fmaxf, "fff", "Fnc") +BUILTIN(__builtin_fmaxf16, "hhh", "Fnc") BUILTIN(__builtin_fmaxl, "LdLdLd", "Fnc") BUILTIN(__builtin_fmin, "ddd", "Fnc") BUILTIN(__builtin_fminf, "fff", "Fnc") +BUILTIN(__builtin_fminf16, "hhh", "Fnc") BUILTIN(__builtin_fminl, "LdLdLd", "Fnc") BUILTIN(__builtin_hypot , "ddd" , "Fne") BUILTIN(__builtin_hypotf, "fff" , "Fne") @@ -235,17 +247,20 @@ BUILTIN(__builtin_llroundl, "LLiLd", "Fn BUILTIN(__builtin_log , "dd" , "Fne") BUILTIN(__builtin_log10 , "dd" , "Fne") BUILTIN(__builtin_log10f, "ff" , "Fne") +BUILTIN(__builtin_log10f16, "hh" , "Fne") BUILTIN(__builtin_log10l, "LdLd", "Fne") BUILTIN(__builtin_log1p , "dd" , "Fne") BUILTIN(__builtin_log1pf, "ff" , "Fne") BUILTIN(__builtin_log1pl, "LdLd", "Fne") BUILTIN(__builtin_log2, "dd" , "Fne") BUILTIN(__builtin_log2f, "ff" , "Fne") +BUILTIN(__builtin_log2f16, "hh" , "Fne") BUILTIN(__builtin_log2l, "LdLd" , "Fne") BUILTIN(__builtin_logb , "dd", "Fne") BUILTIN(__builtin_logbf, "ff", "Fne") BUILTIN(__builtin_logbl, "LdLd", "Fne") BUILTIN(__builtin_logf, "ff" , "Fne") +BUILTIN(__builtin_logf16, "hh" , "Fne") BUILTIN(__builtin_logl, "LdLd", "Fne") BUILTIN(__builtin_lrint , "Lid", "Fne") BUILTIN(__builtin_lrintf, "Lif", "Fne") @@ -270,9 +285,11 @@ BUILTIN(__builtin_remquof, "fffi*", "Fn" BUILTIN(__builtin_remquol, "LdLdLdi*", "Fn")
r363986 - AMDGPU: Add DS GWS sema builtins
Author: arsenm Date: Thu Jun 20 14:33:57 2019 New Revision: 363986 URL: http://llvm.org/viewvc/llvm-project?rev=363986&view=rev Log: AMDGPU: Add DS GWS sema builtins Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=363986&r1=363985&r2=363986&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Thu Jun 20 14:33:57 2019 @@ -47,6 +47,9 @@ BUILTIN(__builtin_amdgcn_s_dcache_inv, " BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n") BUILTIN(__builtin_amdgcn_ds_gws_init, "vUiUi", "n") BUILTIN(__builtin_amdgcn_ds_gws_barrier, "vUiUi", "n") +BUILTIN(__builtin_amdgcn_ds_gws_sema_v, "vUi", "n") +BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n") +BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n") // FIXME: Need to disallow constant address space. BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n") @@ -108,6 +111,7 @@ BUILTIN(__builtin_amdgcn_ds_consume, "ii //===--===// TARGET_BUILTIN(__builtin_amdgcn_s_dcache_inv_vol, "v", "n", "ci-insts") TARGET_BUILTIN(__builtin_amdgcn_buffer_wbinvl1_vol, "v", "n", "ci-insts") +TARGET_BUILTIN(__builtin_amdgcn_ds_gws_sema_release_all, "vUi", "n", "ci-insts") //===--===// // Interpolation builtins. Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl?rev=363986&r1=363985&r2=363986&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl Thu Jun 20 14:33:57 2019 @@ -3,6 +3,8 @@ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu fiji -S -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck %s +typedef unsigned int uint; + // CHECK-LABEL: @test_s_dcache_inv_vol // CHECK: call void @llvm.amdgcn.s.dcache.inv.vol( void test_s_dcache_inv_vol() @@ -17,3 +19,9 @@ void test_buffer_wbinvl1_vol() __builtin_amdgcn_buffer_wbinvl1_vol(); } +// CHECK-LABEL: @test_gws_sema_release_all( +// CHECK: call void @llvm.amdgcn.ds.gws.sema.release.all(i32 %id) +void test_gws_sema_release_all(uint id) +{ + __builtin_amdgcn_ds_gws_sema_release_all(id); +} Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=363986&r1=363985&r2=363986&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Thu Jun 20 14:33:57 2019 @@ -560,6 +560,24 @@ kernel void test_gws_barrier(uint value, __builtin_amdgcn_ds_gws_barrier(value, id); } +// CHECK-LABEL: @test_gws_sema_v( +// CHECK: call void @llvm.amdgcn.ds.gws.sema.v(i32 %id) +kernel void test_gws_sema_v(uint id) { + __builtin_amdgcn_ds_gws_sema_v(id); +} + +// CHECK-LABEL: @test_gws_sema_br( +// CHECK: call void @llvm.amdgcn.ds.gws.sema.br(i32 %value, i32 %id) +kernel void test_gws_sema_br(uint value, uint id) { + __builtin_amdgcn_ds_gws_sema_br(value, id); +} + +// CHECK-LABEL: @test_gws_sema_p( +// CHECK: call void @llvm.amdgcn.ds.gws.sema.p(i32 %id) +kernel void test_gws_sema_p(uint id) { + __builtin_amdgcn_ds_gws_sema_p(id); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } Modified: cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl?rev=363986&r1=363985&r2=363986&view=diff == --- cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl (original) +++ cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error-ci.cl Thu Jun 20 14:33:57 2019 @@ -1,8 +1,9 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s -void test_ci_biltins() +void test_ci_builtins() { __builtin_amdgcn_s_dcache_inv_vol(); // expected-error {{'__builtin_amdgcn_s_dcache_inv_vol' needs target feature ci-insts}} __builtin_amdgcn_buffer_wbinvl1_vol(); // expected-error {{'__builtin_am
r364123 - AMDGPU: Fix target builtins for gfx10
Author: arsenm Date: Fri Jun 21 18:30:00 2019 New Revision: 364123 URL: http://llvm.org/viewvc/llvm-project?rev=364123&view=rev Log: AMDGPU: Fix target builtins for gfx10 This wasn't setting some of the features from older generations. Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=364123&r1=364122&r2=364123&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Fri Jun 21 18:30:00 2019 @@ -144,8 +144,10 @@ bool AMDGPUTargetInfo::initFeatureMap( LLVM_FALLTHROUGH; case GK_GFX1010: Features["dl-insts"] = true; + Features["ci-insts"] = true; Features["16-bit-insts"] = true; Features["dpp"] = true; + Features["gfx8-insts"] = true; Features["gfx9-insts"] = true; Features["gfx10-insts"] = true; Features["s-memrealtime"] = true; Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl?rev=364123&r1=364122&r2=364123&view=diff == --- cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl Fri Jun 21 18:30:00 2019 @@ -15,9 +15,9 @@ // GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime" // GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime" -// GFX1010: "target-features"="+16-bit-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx9-insts,+s-memrealtime" -// GFX1011: "target-features"="+16-bit-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx9-insts,+s-memrealtime" -// GFX1012: "target-features"="+16-bit-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx9-insts,+s-memrealtime" +// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" // GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime" // GFX700: "target-features"="+ci-insts,+fp64-fp16-denormals,-fp32-denormals" // GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals" Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl?rev=364123&r1=364122&r2=364123&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-ci.cl Fri Jun 21 18:30:00 2019 @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu hawaii -S -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu fiji -S -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s typedef unsigned int uint; Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl?rev=364123&r1=364122&r2=364123&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err-clamp.cl Fri Jun 21 18:30:00 2019 @@ -1,6 +1,7 @
r364251 - AMDGPU: Fix missing declaration for mbcnt builtins
Author: arsenm Date: Mon Jun 24 16:34:06 2019 New Revision: 364251 URL: http://llvm.org/viewvc/llvm-project?rev=364251&view=rev Log: AMDGPU: Fix missing declaration for mbcnt builtins Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=364251&r1=364250&r2=364251&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Mon Jun 24 16:34:06 2019 @@ -33,6 +33,9 @@ BUILTIN(__builtin_amdgcn_workitem_id_x, BUILTIN(__builtin_amdgcn_workitem_id_y, "Ui", "nc") BUILTIN(__builtin_amdgcn_workitem_id_z, "Ui", "nc") +BUILTIN(__builtin_amdgcn_mbcnt_hi, "UiUiUi", "nc") +BUILTIN(__builtin_amdgcn_mbcnt_lo, "UiUiUi", "nc") + //===--===// // Instruction builtins. //===--===// Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl?rev=364251&r1=364250&r2=364251&view=diff == --- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl Mon Jun 24 16:34:06 2019 @@ -578,6 +578,18 @@ kernel void test_gws_sema_p(uint id) { __builtin_amdgcn_ds_gws_sema_p(id); } +// CHECK-LABEL: @test_mbcnt_lo( +// CHECK: call i32 @llvm.amdgcn.mbcnt.lo(i32 %src0, i32 %src1) +kernel void test_mbcnt_lo(global uint* out, uint src0, uint src1) { + *out = __builtin_amdgcn_mbcnt_lo(src0, src1); +} + +// CHECK-LABEL: @test_mbcnt_hi( +// CHECK: call i32 @llvm.amdgcn.mbcnt.hi(i32 %src0, i32 %src1) +kernel void test_mbcnt_hi(global uint* out, uint src0, uint src1) { + *out = __builtin_amdgcn_mbcnt_hi(src0, src1); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r331216 - AMDGPU: Add Vega12 and Vega20
Author: arsenm Date: Mon Apr 30 12:08:27 2018 New Revision: 331216 URL: http://llvm.org/viewvc/llvm-project?rev=331216&view=rev Log: AMDGPU: Add Vega12 and Vega20 Changes by Matt Arsenault Konstantin Zhuravlyov Added: cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def cfe/trunk/lib/Basic/Targets/AMDGPU.cpp cfe/trunk/lib/Basic/Targets/AMDGPU.h cfe/trunk/test/Driver/amdgpu-macros.cl cfe/trunk/test/Driver/amdgpu-mcpu.cl Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=331216&r1=331215&r2=331216&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Mon Apr 30 12:08:27 2018 @@ -121,6 +121,18 @@ TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "", "nc", "gfx9-insts") //===--===// +// Deep learning builtins. +//===--===// + +TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hf", "nc", "dl-insts") +TARGET_BUILTIN(__builtin_amdgcn_sdot2, "SiV2SsV2SsSi", "nc", "dl-insts") +TARGET_BUILTIN(__builtin_amdgcn_udot2, "UiV2UsV2UsUi", "nc", "dl-insts") +TARGET_BUILTIN(__builtin_amdgcn_sdot4, "SiSiSiSi", "nc", "dl-insts") +TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUi", "nc", "dl-insts") +TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSi", "nc", "dl-insts") +TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUi", "nc", "dl-insts") + +//===--===// // Special builtins. //===--===// BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc") Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=331216&r1=331215&r2=331216&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Mon Apr 30 12:08:27 2018 @@ -133,6 +133,10 @@ bool AMDGPUTargetInfo::initFeatureMap( CPU = "gfx600"; switch (parseAMDGCNName(CPU).Kind) { +case GK_GFX906: + Features["dl-insts"] = true; + LLVM_FALLTHROUGH; +case GK_GFX904: case GK_GFX902: case GK_GFX900: Features["gfx9-insts"] = true; Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.h?rev=331216&r1=331215&r2=331216&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.h (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.h Mon Apr 30 12:08:27 2018 @@ -78,9 +78,11 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTarg GK_GFX810, GK_GFX900, GK_GFX902, +GK_GFX904, +GK_GFX906, GK_AMDGCN_FIRST = GK_GFX600, -GK_AMDGCN_LAST = GK_GFX902, +GK_AMDGCN_LAST = GK_GFX906, }; struct GPUInfo { @@ -127,7 +129,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTarg {{"cayman"}, {"cayman"}, GK_CAYMAN, true, false, false, false, false}, {{"turks"}, {"turks"}, GK_TURKS, false, false, false, false, false}, }; - static constexpr GPUInfo AMDGCNGPUs[30] = { + static constexpr GPUInfo AMDGCNGPUs[32] = { // Name CanonicalKindHas HasHasHas Has //Name FMAF Fast LDEXPF FP64 Fast // FMAFFMA @@ -161,6 +163,8 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTarg {{"stoney"},{"gfx810"}, GK_GFX810, true, false, true, true, true}, {{"gfx900"},{"gfx900"}, GK_GFX900, true, true, true, true, true}, {{"gfx902"},{"gfx902"}, GK_GFX900, true, true, true, true, true}, +{{"gfx904"},{"gfx904"}, GK_GFX904, true, true, true, true, true}, +{{"gfx906"},{"gfx906"}, GK_GFX906, true, true, true, true, true}, }; static GPUInfo parseR600Name(StringRef Name); Added: cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl?rev=331216&view=auto == --- cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl (added) +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-features.cl Mon Apr 30 12:08:27 2018 @@ -0,0 +1,12 @@ +// REQUIRES: amdgpu-registered-target + +// Check that appropriate features are defined for every support
r336676 - Update test for backend error message change
Author: arsenm Date: Tue Jul 10 07:03:50 2018 New Revision: 336676 URL: http://llvm.org/viewvc/llvm-project?rev=336676&view=rev Log: Update test for backend error message change Modified: cfe/trunk/test/CodeGen/backend-unsupported-error.ll Modified: cfe/trunk/test/CodeGen/backend-unsupported-error.ll URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/backend-unsupported-error.ll?rev=336676&r1=336675&r2=336676&view=diff == --- cfe/trunk/test/CodeGen/backend-unsupported-error.ll (original) +++ cfe/trunk/test/CodeGen/backend-unsupported-error.ll Tue Jul 10 07:03:50 2018 @@ -3,7 +3,7 @@ ; This is to check that backend errors for unsupported features are formatted correctly -; CHECK: error: test.c:2:20: in function bar i32 (): unsupported call to function foo.2 +; CHECK: error: test.c:2:20: in function bar i32 (): unsupported call to function foo target triple = "r600-unknown-unknown" ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r336681 - AMDGPU: Try to fix test again
Author: arsenm Date: Tue Jul 10 07:47:31 2018 New Revision: 336681 URL: http://llvm.org/viewvc/llvm-project?rev=336681&view=rev Log: AMDGPU: Try to fix test again Modified: cfe/trunk/test/CodeGen/backend-unsupported-error.ll Modified: cfe/trunk/test/CodeGen/backend-unsupported-error.ll URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/backend-unsupported-error.ll?rev=336681&r1=336680&r2=336681&view=diff == --- cfe/trunk/test/CodeGen/backend-unsupported-error.ll (original) +++ cfe/trunk/test/CodeGen/backend-unsupported-error.ll Tue Jul 10 07:47:31 2018 @@ -21,7 +21,7 @@ entry: ret i32 %call, !dbg !15 } -attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind noinline "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!9, !10} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 40ab8ae - OpenMP: Add helper function for convergent runtime calls
Author: Matt Arsenault Date: 2019-10-27T21:26:55-07:00 New Revision: 40ab8ae9fb70f1550815bf0f867148b5101a4f66 URL: https://github.com/llvm/llvm-project/commit/40ab8ae9fb70f1550815bf0f867148b5101a4f66 DIFF: https://github.com/llvm/llvm-project/commit/40ab8ae9fb70f1550815bf0f867148b5101a4f66.diff LOG: OpenMP: Add helper function for convergent runtime calls Most of the functions emitted here should probably be convergent, but only barriers are currently marked. Introduce this helper before adding convergent to more functions. Added: Modified: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/lib/CodeGen/CodeGenModule.h Removed: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 708260429f68..910992e76d0a 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -1799,9 +1799,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); -RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); -cast(RTLFn.getCallee()) -->addFnAttr(llvm::Attribute::Convergent); +RTLFn = +CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); break; } case OMPRTL__kmpc_barrier_simple_spmd: { @@ -1810,10 +1809,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); -RTLFn = -CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); -cast(RTLFn.getCallee()) -->addFnAttr(llvm::Attribute::Convergent); +RTLFn = CGM.CreateConvergentRuntimeFunction( +FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); break; } case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index b05a58848e82..75708d6e4966 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -3332,8 +3332,14 @@ GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) { /// type and name. llvm::FunctionCallee CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, - llvm::AttributeList ExtraAttrs, - bool Local) { + llvm::AttributeList ExtraAttrs, bool Local, + bool AssumeConvergent) { + if (AssumeConvergent) { +ExtraAttrs = +ExtraAttrs.addAttribute(VMContext, llvm::AttributeList::FunctionIndex, +llvm::Attribute::Convergent); + } + llvm::Constant *C = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false, /*DontDefer=*/false, /*IsThunk=*/false, diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 73f81adae35f..f5014c05b067 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1027,11 +1027,22 @@ class CodeGenModule : public CodeGenTypeCache { } /// Create or return a runtime function declaration with the specified type - /// and name. + /// and name. If \p AssumeConvergent is true, the call will have the + /// convergent attribute added. llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs = llvm::AttributeList(), -bool Local = false); +bool Local = false, bool AssumeConvergent = false); + + /// Create or return a runtime function declaration with the specified type + /// and name. This will automatically add the convergent attribute to the + /// function declaration. + llvm::FunctionCallee CreateConvergentRuntimeFunction( + llvm::FunctionType *Ty, StringRef Name, + llvm::AttributeList ExtraAttrs = llvm::AttributeList(), + bool Local = false) { +return CreateRuntimeFunction(Ty, Name, ExtraAttrs, Local, true); + } /// Create a new runtime global variable with the specified type and name. llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty, ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 56a503b - OpenMP: Add convergent to more runtime functions
Author: Matt Arsenault Date: 2019-10-27T21:26:55-07:00 New Revision: 56a503bdba9c33fce4d8fe86494cfd9c0b62c88a URL: https://github.com/llvm/llvm-project/commit/56a503bdba9c33fce4d8fe86494cfd9c0b62c88a DIFF: https://github.com/llvm/llvm-project/commit/56a503bdba9c33fce4d8fe86494cfd9c0b62c88a.diff LOG: OpenMP: Add convergent to more runtime functions Several of these other functions are probably also convergent, but these two seem obviously convergent. Added: Modified: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp clang/test/OpenMP/nvptx_parallel_codegen.cpp Removed: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 910992e76d0a..b7808e0c4f69 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -1817,14 +1817,14 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { // Build int32_t __kmpc_warp_active_thread_mask(void); auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false); -RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask"); +RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask"); break; } case OMPRTL_NVPTX__kmpc_syncwarp: { // Build void __kmpc_syncwarp(kmp_int32 Mask); auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false); -RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_syncwarp"); +RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp"); break; } } diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index 32061bf7386c..2fc06350c380 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -88,7 +88,7 @@ int bar(int n){ // CHECK: br label {{%?}}[[AWAIT_WORK:.+]] // // CHECK: [[AWAIT_WORK]] -// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#BARRIER_ATTRS:]] +// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#CONVERGENT:]] // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]] // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 @@ -318,10 +318,10 @@ int bar(int n){ // CHECK: define internal void [[PARALLEL_FN4]]( // CHECK: [[A:%.+]] = alloca i[[SZ:32|64]], // CHECK: store i[[SZ]] 45, i[[SZ]]* %a, -// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) #[[#BARRIER_ATTRS]] +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) #[[#CONVERGENT:]] // CHECK: ret void -// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#BARRIER_ATTRS]] +// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT]] // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l55}}_worker() // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l55}}( @@ -343,7 +343,7 @@ int bar(int n){ // CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.*}}) // CHECK: [[CC:%.+]] = alloca i32, -// CHECK: [[MASK:%.+]] = call i32 @__kmpc_warp_active_thread_mask() +// CHECK: [[MASK:%.+]] = call i32 @__kmpc_warp_active_thread_mask(){{$}} // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK: [[NUM_THREADS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: store i32 0, i32* [[CC]], @@ -363,11 +363,15 @@ int bar(int n){ // CHECK: store i32 // CHECK: call void @__kmpc_end_critical( -// CHECK: call void @__kmpc_syncwarp(i32 [[MASK]]) +// CHECK: call void @__kmpc_syncwarp(i32 [[MASK]]){{$}} // CHECK: [[NEW_CC_VAL:%.+]] = add nsw i32 [[CC_VAL]], 1 // CHECK: store i32 [[NEW_CC_VAL]], i32* [[CC]], // CHECK: br label -// CHECK: attributes #[[#BARRIER_ATTRS]] = {{.*}} convergent {{.*}} + +// CHECK: declare i32 @__kmpc_warp_active_thread_mask() #[[#CONVERGENT:]] +// CHECK: declare void @__kmpc_syncwarp(i32) #[[#CONVERGENT:]] + +// CHECK: attributes #[[#CONVERGENT]] = {{.*}} convergent {{.*}} #endif ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 1d96dca - HIP: Try to deal with more llvm package layouts
Author: Matt Arsenault Date: 2020-05-23T13:28:24-04:00 New Revision: 1d96dca9491e3d75c11c3cd1acff5fcda8c2f613 URL: https://github.com/llvm/llvm-project/commit/1d96dca9491e3d75c11c3cd1acff5fcda8c2f613 DIFF: https://github.com/llvm/llvm-project/commit/1d96dca9491e3d75c11c3cd1acff5fcda8c2f613.diff LOG: HIP: Try to deal with more llvm package layouts The various HIP builds are all inconsistent. The default llvm install goes to ${INSTALL_PREFIX}/bin/clang, but the rocm packaging scripts move this under ${INSTALL_PREFIX}/llvm/bin/clang. Some other builds further pollute this with ${INSTALL_PREFIX}/bin/x86_64/clang. These should really be consolidated, but try to handle them for now. Added: Modified: clang/lib/Driver/ToolChains/AMDGPU.cpp Removed: diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 193ccad98f52..3e51bd00bae4 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -107,11 +107,18 @@ RocmInstallationDetector::RocmInstallationDetector( // the Windows-esque layout the ROCm packages use with the host architecture // subdirectory of bin. +// Strip off directory (usually bin) StringRef ParentDir = llvm::sys::path::parent_path(InstallDir); -if (ParentDir == HostTriple.getArchName()) +StringRef ParentName = llvm::sys::path::filename(ParentDir); + +// Some builds use bin/{host arch}, so go up again. +if (ParentName == "bin") { ParentDir = llvm::sys::path::parent_path(ParentDir); + ParentName = llvm::sys::path::filename(ParentDir); +} -if (ParentDir == "bin") { +if (ParentName == "llvm") { + // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin Candidates.emplace_back(llvm::sys::path::parent_path(ParentDir).str(), /*StrictChecking=*/true); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[libclc] cf4d4e3 - libclc: Compile with -nostdlib
Author: Matt Arsenault Date: 2020-05-28T10:41:31-04:00 New Revision: cf4d4e366a2165f0e93948f166d76ae650aecc98 URL: https://github.com/llvm/llvm-project/commit/cf4d4e366a2165f0e93948f166d76ae650aecc98 DIFF: https://github.com/llvm/llvm-project/commit/cf4d4e366a2165f0e93948f166d76ae650aecc98.diff LOG: libclc: Compile with -nostdlib This fixes a build error when compiling for amdgcn-amd-amdhsa, which defaults to trying to link bitcode libraries. Added: Modified: libclc/CMakeLists.txt Removed: diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 7b981110f6fd..9472f191fbde 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -262,7 +262,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) target_compile_definitions( builtins.link.${arch_suffix} PRIVATE "__CLC_INTERNAL" ) target_compile_options( builtins.link.${arch_suffix} PRIVATE -target - ${t} ${mcpu} -fno-builtin ) + ${t} ${mcpu} -fno-builtin -nostdlib ) set_target_properties( builtins.link.${arch_suffix} PROPERTIES LINKER_LANGUAGE CLC ) ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 97f3f0b - AMDGPU: Add intrinsic for s_setreg
Author: Matt Arsenault Date: 2020-05-28T14:26:38-04:00 New Revision: 97f3f0bab0982f84745c7ac5ce8fb6b0918ff718 URL: https://github.com/llvm/llvm-project/commit/97f3f0bab0982f84745c7ac5ce8fb6b0918ff718 DIFF: https://github.com/llvm/llvm-project/commit/97f3f0bab0982f84745c7ac5ce8fb6b0918ff718.diff LOG: AMDGPU: Add intrinsic for s_setreg This will be more useful with fenv access implemented. Added: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll Modified: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/test/CodeGenOpenCL/builtins-amdgcn.cl clang/test/SemaOpenCL/builtins-amdgcn-error.cl llvm/include/llvm/IR/IntrinsicsAMDGPU.td llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/lib/Target/AMDGPU/SIInstrInfo.td llvm/lib/Target/AMDGPU/SOPInstructions.td Removed: diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 5633ccd5d744..28379142b05a 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -44,6 +44,7 @@ BUILTIN(__builtin_amdgcn_mbcnt_lo, "UiUiUi", "nc") // Instruction builtins. //===--===// BUILTIN(__builtin_amdgcn_s_getreg, "UiIi", "n") +BUILTIN(__builtin_amdgcn_s_setreg, "vIiUi", "n") BUILTIN(__builtin_amdgcn_s_getpc, "LUi", "n") BUILTIN(__builtin_amdgcn_s_waitcnt, "vIi", "n") BUILTIN(__builtin_amdgcn_s_sendmsg, "vIiUi", "n") diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 8f2f149103b3..3563ad464c66 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -715,6 +715,12 @@ kernel void test_mqsad_u32_u8(global uint4* out, ulong src0, uint src1, uint4 sr *out = __builtin_amdgcn_mqsad_u32_u8(src0, src1, src2); } +// CHECK-LABEL: test_s_setreg( +// CHECK: call void @llvm.amdgcn.s.setreg(i32 8193, i32 %val) +kernel void test_s_setreg(uint val) { + __builtin_amdgcn_s_setreg(8193, val); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl index ad5e8776b2e8..dbe2900b600b 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl @@ -139,3 +139,8 @@ void test_fence() { const char ptr[] = "workgroup"; __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}} } + +void test_s_setreg(int x, int y) { + __builtin_amdgcn_s_setreg(x, 0); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}} + __builtin_amdgcn_s_setreg(x, y); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index e2d8f3cb1bd6..40449304ed04 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1207,6 +1207,16 @@ def int_amdgcn_s_getreg : [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg>] >; +// Note this can be used to set FP environment properties that are +// unsafe to change in non-strictfp functions. The register properties +// available (and value required to access them) may diff er per +// subtarget. llvm.amdgcn.s.setreg(hwmode, value) +def int_amdgcn_s_setreg : + GCCBuiltin<"__builtin_amdgcn_s_setreg">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, ImmArg>] +>; + // int_amdgcn_s_getpc is provided to allow a specific style of position // independent code to determine the high part of its address when it is // known (through convention) that the code and any data of interest does diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 3b8f88271458..59f9866b93b6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -202,13 +202,6 @@ def AMDGPUSetCCOp : SDTypeProfile<1, 3, [// setcc def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>; -def AMDGPUSetRegOp : SDTypeProfile<0, 2, [ - SDTCisInt<0>, SDTCisInt<1> -]>; - -def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [ - SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>; - def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [ SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; diff --git a/llvm/lib/Target/AMDGP
[clang] 301a6da - AMDGPU: Fix clang side null pointer value for private
Author: Matt Arsenault Date: 2020-06-02T09:23:46-04:00 New Revision: 301a6da8c24a09052e3bda10e90b450b7b39ffea URL: https://github.com/llvm/llvm-project/commit/301a6da8c24a09052e3bda10e90b450b7b39ffea DIFF: https://github.com/llvm/llvm-project/commit/301a6da8c24a09052e3bda10e90b450b7b39ffea.diff LOG: AMDGPU: Fix clang side null pointer value for private The change to fold_priv_arith looks strange to me, but this was already the untested behavior for local. Added: Modified: clang/lib/Basic/Targets/AMDGPU.h clang/test/CodeGenOpenCL/amdgpu-nullptr.cl Removed: diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 6c9060aa3f7b..e4194a881e3f 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -355,7 +355,9 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { // address space has value 0 but in private and local address space has // value ~0. uint64_t getNullPointerValue(LangAS AS) const override { -return AS == LangAS::opencl_local ? ~0 : 0; +// FIXME: Also should handle region. +return (AS == LangAS::opencl_local || AS == LangAS::opencl_private) + ? ~0 : 0; } void setAuxTarget(const TargetInfo *Aux) override; diff --git a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl index ba34d168bf79..753f7f6f4406 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl @@ -19,7 +19,7 @@ typedef struct { // Test 0 as initializer. -// CHECK: @private_p = local_unnamed_addr addrspace(1) global i8 addrspace(5)* null, align 4 +// CHECK: @private_p = local_unnamed_addr addrspace(1) global i8 addrspace(5)* addrspacecast (i8* null to i8 addrspace(5)*), align 4 private char *private_p = 0; // CHECK: @local_p = local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4 @@ -36,7 +36,7 @@ generic char *generic_p = 0; // Test NULL as initializer. -// CHECK: @private_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(5)* null, align 4 +// CHECK: @private_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(5)* addrspacecast (i8* null to i8 addrspace(5)*), align 4 private char *private_p_NULL = NULL; // CHECK: @local_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4 @@ -57,25 +57,28 @@ generic char *generic_p_NULL = NULL; // CHECK: @fold_generic = local_unnamed_addr addrspace(1) global i32* null, align 8 generic int *fold_generic = (global int*)(generic float*)(private char*)0; -// CHECK: @fold_priv = local_unnamed_addr addrspace(1) global i16 addrspace(5)* null, align 4 +// CHECK: @fold_priv = local_unnamed_addr addrspace(1) global i16 addrspace(5)* addrspacecast (i16* null to i16 addrspace(5)*), align 4 private short *fold_priv = (private short*)(generic int*)(global void*)0; -// CHECK: @fold_priv_arith = local_unnamed_addr addrspace(1) global i8 addrspace(5)* inttoptr (i32 10 to i8 addrspace(5)*), align 4 +// CHECK: @fold_priv_arith = local_unnamed_addr addrspace(1) global i8 addrspace(5)* inttoptr (i32 9 to i8 addrspace(5)*), align 4 private char *fold_priv_arith = (private char*)0 + 10; -// CHECK: @fold_int = local_unnamed_addr addrspace(1) global i32 14, align 4 +// CHECK: @fold_local_arith = local_unnamed_addr addrspace(1) global i8 addrspace(3)* inttoptr (i32 9 to i8 addrspace(3)*), align 4 +local char *fold_local_arith = (local char*)0 + 10; + +// CHECK: @fold_int = local_unnamed_addr addrspace(1) global i32 13, align 4 int fold_int = (int)(private void*)(generic char*)(global int*)0 + 14; -// CHECK: @fold_int2 = local_unnamed_addr addrspace(1) global i32 13, align 4 +// CHECK: @fold_int2 = local_unnamed_addr addrspace(1) global i32 12, align 4 int fold_int2 = (int) ((private void*)0 + 13); -// CHECK: @fold_int3 = local_unnamed_addr addrspace(1) global i32 0, align 4 +// CHECK: @fold_int3 = local_unnamed_addr addrspace(1) global i32 -1, align 4 int fold_int3 = (int) ((private int*)0); -// CHECK: @fold_int4 = local_unnamed_addr addrspace(1) global i32 8, align 4 +// CHECK: @fold_int4 = local_unnamed_addr addrspace(1) global i32 7, align 4 int fold_int4 = (int) &((private int*)0)[2]; -// CHECK: @fold_int5 = local_unnamed_addr addrspace(1) global i32 4, align 4 +// CHECK: @fold_int5 = local_unnamed_addr addrspace(1) global i32 3, align 4 int fold_int5 = (int) &((private StructTy1*)0)->p2; @@ -97,12 +100,12 @@ int fold_int5_local = (int) &((local StructTy1*)0)->p2; // Test static variable initialization. -// NOOPT: @test_static_var_private.sp1 = internal addrspace(1) global i8 addrspace(5)* null, align 4 -// NOOPT: @test_static_var_private.sp2 = internal addrspace(1) global i8 addrspace(5)* null, align 4 -/
[clang] 4593e41 - AMDGPU: Teach toolchain to link rocm device libs
Author: Matt Arsenault Date: 2020-04-10T13:37:32-04:00 New Revision: 4593e4131affa84e61d7b6844be409ba46d29f11 URL: https://github.com/llvm/llvm-project/commit/4593e4131affa84e61d7b6844be409ba46d29f11 DIFF: https://github.com/llvm/llvm-project/commit/4593e4131affa84e61d7b6844be409ba46d29f11.diff LOG: AMDGPU: Teach toolchain to link rocm device libs Currently the library is separately linked, but this isn't correct to implement fast math flags correctly. Each module should get the version of the library appropriate for its combination of fast math and related flags, with the attributes propagated into its functions and internalized. HIP already maintains the list of libraries, but this is not used for OpenCL. Unfortunately, HIP uses a separate --hip-device-lib argument, despite both languages using the same bitcode library. Eventually these two searches need to be merged. An additional problem is there are 3 different locations the libraries are installed, depending on which build is used. This also needs to be consolidated (or at least the search logic needs to deal with this unnecessary complexity). Added: clang/test/Driver/Inputs/rocm-device-libs/lib/hip.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/ockl.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_off.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_on.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_off.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_on.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1010.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1011.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1012.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_803.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_900.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_off.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_on.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_off.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_on.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/ocml.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/opencl.amdgcn.bc clang/test/Driver/rocm-detect.cl clang/test/Driver/rocm-device-libs.cl clang/test/Driver/rocm-not-found.cl Modified: clang/include/clang/Basic/DiagnosticDriverKinds.td clang/include/clang/Driver/Options.td clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/AMDGPU.h clang/lib/Driver/ToolChains/HIP.cpp clang/lib/Driver/ToolChains/HIP.h clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl clang/test/Driver/amdgpu-visibility.cl llvm/include/llvm/Support/TargetParser.h llvm/lib/Support/TargetParser.cpp Removed: diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index cba59cb3b66d..b28ee88f3d87 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -55,6 +55,14 @@ def err_drv_no_cuda_installation : Error< def err_drv_no_cuda_libdevice : Error< "cannot find libdevice for %0. Provide path to diff erent CUDA installation " "via --cuda-path, or pass -nocudalib to build without linking with libdevice.">; + +def err_drv_no_rocm_installation : Error< + "cannot find ROCm installation. Provide its path via --rocm-path, or pass " + "-nogpulib.">; +def err_drv_no_rocm_device_lib : Error< + "cannot find device library for %0. Provide path to diff erent ROCm installation " + "via --rocm-path, or pass -nogpulib to build without linking default libraries.">; + def err_drv_cuda_version_unsupported : Error< "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), " "but installation at %3 is %4. Use --cuda-path to specify a diff erent CUDA " diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 02875f68ebfe..661aad49a8ee 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -608,6 +608,8 @@ def : Flag<["-"], "fno-cuda-rdc">, Alias; def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>, HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">; def fno_cuda_short
[clang] 1e93b3d - Disable test on windows
Author: Matt Arsenault Date: 2020-04-10T18:48:18-04:00 New Revision: 1e93b3d8a75638c2268df58e44a8738845df3ed5 URL: https://github.com/llvm/llvm-project/commit/1e93b3d8a75638c2268df58e44a8738845df3ed5 DIFF: https://github.com/llvm/llvm-project/commit/1e93b3d8a75638c2268df58e44a8738845df3ed5.diff LOG: Disable test on windows Added: Modified: clang/test/Driver/rocm-device-libs.cl Removed: diff --git a/clang/test/Driver/rocm-device-libs.cl b/clang/test/Driver/rocm-device-libs.cl index 83641d24d156..77e9782f2594 100644 --- a/clang/test/Driver/rocm-device-libs.cl +++ b/clang/test/Driver/rocm-device-libs.cl @@ -1,5 +1,6 @@ // REQUIRES: clang-driver // REQUIRES: amdgpu-registered-target +// REQUIRES: !system-windows // Test flush-denormals-to-zero enabled uses oclc_daz_opt_on ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] dc89a3e - HIP: Fix handling of denormal mode
Author: Matt Arsenault Date: 2020-04-13T11:48:45-07:00 New Revision: dc89a3efb43feedec04facfa2206de011d2606e7 URL: https://github.com/llvm/llvm-project/commit/dc89a3efb43feedec04facfa2206de011d2606e7 DIFF: https://github.com/llvm/llvm-project/commit/dc89a3efb43feedec04facfa2206de011d2606e7.diff LOG: HIP: Fix handling of denormal mode I didn't realize HIP was a distinct offloading kind, so the subtarget was looking for -march, which isn't correct for HIP. We also have the possibility of different denormal defaults in the case of multiple offload targets, so we need to thread the JobAction through the target hook. Added: Modified: clang/include/clang/Driver/ToolChain.h clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/AMDGPU.h clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/Cuda.cpp clang/lib/Driver/ToolChains/Cuda.h clang/lib/Driver/ToolChains/Linux.cpp clang/lib/Driver/ToolChains/Linux.h clang/lib/Driver/ToolChains/PS4CPU.h clang/test/Driver/cuda-flush-denormals-to-zero.cu Removed: diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 66f22d538138..fb3cbd7f84c8 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -636,8 +636,7 @@ class ToolChain { /// environment for the given \p FPType if given. Otherwise, the default /// assumed mode for any floating point type. virtual llvm::DenormalMode getDefaultDenormalModeForType( - const llvm::opt::ArgList &DriverArgs, - Action::OffloadKind DeviceOffloadKind, + const llvm::opt::ArgList &DriverArgs, const JobAction &JA, const llvm::fltSemantics *FPType = nullptr) const { return llvm::DenormalMode::getIEEE(); } diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index f09578f4769e..2a796f28403f 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -273,18 +273,22 @@ bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget( } llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( -const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind, +const llvm::opt::ArgList &DriverArgs, const JobAction &JA, const llvm::fltSemantics *FPType) const { // Denormals should always be enabled for f16 and f64. if (!FPType || FPType != &llvm::APFloat::IEEEsingle()) return llvm::DenormalMode::getIEEE(); - if (DeviceOffloadKind == Action::OFK_Cuda) { + if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || + JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { +auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch()); if (FPType && FPType == &llvm::APFloat::IEEEsingle() && DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, options::OPT_fno_cuda_flush_denormals_to_zero, - false)) + getDefaultDenormsAreZeroForTarget(Kind))) return llvm::DenormalMode::getPreserveSign(); + +return llvm::DenormalMode::getIEEE(); } const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); @@ -294,7 +298,9 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( // them all? bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || getDefaultDenormsAreZeroForTarget(Kind); - // Outputs are flushed to zero, preserving sign + + // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are + // also implicit treated as zero (DAZ). return DAZ ? llvm::DenormalMode::getPreserveSign() : llvm::DenormalMode::getIEEE(); } diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index 87a16272d624..afd71e1f595b 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -214,8 +214,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF { static bool getDefaultDenormsAreZeroForTarget(llvm::AMDGPU::GPUKind GPUKind); llvm::DenormalMode getDefaultDenormalModeForType( - const llvm::opt::ArgList &DriverArgs, - Action::OffloadKind DeviceOffloadKind, + const llvm::opt::ArgList &DriverArgs, const JobAction &JA, const llvm::fltSemantics *FPType = nullptr) const override; }; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 5f9b6d813416..415ef27eee0a 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2510,7 +2510,7 @@ static void CollectArgsForIntegratedAssembler(Compilation &C, static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, bool OFastEnabled, const ArgList &
[clang] 3a61245 - clang/AMDGPU: Assume denormals are enabled for the default target.
Author: Matt Arsenault Date: 2020-04-15T09:17:26-04:00 New Revision: 3a612450508b314b7a6f4db142d0c619031d760e URL: https://github.com/llvm/llvm-project/commit/3a612450508b314b7a6f4db142d0c619031d760e DIFF: https://github.com/llvm/llvm-project/commit/3a612450508b314b7a6f4db142d0c619031d760e.diff LOG: clang/AMDGPU: Assume denormals are enabled for the default target. Since the default logic was based on having fast denormal/fma features, and the default target has no features, we assumed flushing by default. This fixes incorrectly assuming flushing in builds for "generic" IR libraries. The handling for no specified --cuda-gpu-arch in HIP is kind of broken. Somewhere else forces a default target of gfx803, which does not enable denormal handling by default. We don't see this default switching here, so you'll end up with a different denormal mode depending on whether you explicitly requested gfx803, or used it by default. Added: Modified: clang/lib/Driver/ToolChains/AMDGPU.cpp clang/test/Driver/cl-denorms-are-zero.cl clang/test/Driver/cuda-flush-denormals-to-zero.cu Removed: diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 2a796f28403f..e8c0b824ace1 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -262,6 +262,11 @@ AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget( llvm::AMDGPU::GPUKind Kind) { + + // Assume nothing without a specific target. + if (Kind == llvm::AMDGPU::GK_NONE) +return false; + const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); // Default to enabling f32 denormals by default on subtargets where fma is diff --git a/clang/test/Driver/cl-denorms-are-zero.cl b/clang/test/Driver/cl-denorms-are-zero.cl index 7774c0d60da8..e3fd095e5831 100644 --- a/clang/test/Driver/cl-denorms-are-zero.cl +++ b/clang/test/Driver/cl-denorms-are-zero.cl @@ -1,20 +1,24 @@ // Slow FMAF and slow f32 denormals -// RUN: %clang -### -target amdgcn--amdhsa -c -mcpu=pitcairn %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s +// RUN: %clang -### -target amdgcn--amdhsa -nogpulib -c -mcpu=pitcairn %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s // RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -c -mcpu=pitcairn %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s // Fast FMAF, but slow f32 denormals -// RUN: %clang -### -target amdgcn--amdhsa -c -mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s +// RUN: %clang -### -target amdgcn--amdhsa -nogpulib -c -mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s // RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -c -mcpu=tahiti %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s // Fast F32 denormals, but slow FMAF -// RUN: %clang -### -target amdgcn--amdhsa -c -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s +// RUN: %clang -### -target amdgcn--amdhsa -nogpulib -c -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s // RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -c -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s // Fast F32 denormals and fast FMAF -// RUN: %clang -### -target amdgcn--amdhsa -c -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-DENORM %s -// RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -c -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s +// RUN: %clang -### -target amdgcn--amdhsa -nogpulib -c -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-DENORM %s +// RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -nogpulib -c -mcpu=gfx900 %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s + +// Default target is artificial, but should assume a conservative default. +// RUN: %clang -### -target amdgcn--amdhsa -nogpulib -c %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-DENORM %s +// RUN: %clang -### -cl-denorms-are-zero -o - -target amdgcn--amdhsa -nogpulib -c %s 2>&1 | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s // AMDGCN-FLUSH: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign" // This should be omitted and default to ieee -// AMDGCN-DENORM-NOT: "-fdenormal-fp-math-f32" +// AMDGCN-DENORM-NOT: denormal-fp-math diff --git a/clang/test/Driver/cuda-flush-denormals-to-zero.cu b/clang/test/Driver/cuda-flush-denormals-to-zero.cu index a515b5f8ca07..4a7b88823771 100644 --- a/clang/test/Driver/cuda-flush-denormals-to-zero.cu +++ b/clang/test/Driver/cuda-flush-denormals-to-zero.cu @@ -22,6 +22,8 @@ // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudain
[clang] 9e03bde - AMDGPU: Add llvm.amdgcn.sqrt intrinsic
Author: Matt Arsenault Date: 2020-06-26T15:07:07-04:00 New Revision: 9e03bdebc17a223416d682f64ef2046b8bf0fc98 URL: https://github.com/llvm/llvm-project/commit/9e03bdebc17a223416d682f64ef2046b8bf0fc98 DIFF: https://github.com/llvm/llvm-project/commit/9e03bdebc17a223416d682f64ef2046b8bf0fc98.diff LOG: AMDGPU: Add llvm.amdgcn.sqrt intrinsic I spread the GlobalISel test into the regular one, which I've been avoiding so far. Added: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.f16.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.ll Modified: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl clang/test/CodeGenOpenCL/builtins-amdgcn.cl clang/test/SemaOpenCL/builtins-amdgcn-error-f16.cl llvm/include/llvm/IR/IntrinsicsAMDGPU.td llvm/lib/Target/AMDGPU/AMDGPUInstructions.td llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp llvm/lib/Target/AMDGPU/VOP1Instructions.td Removed: diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 9add10c64962..60be0525fabc 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -77,6 +77,8 @@ BUILTIN(__builtin_amdgcn_trig_preop, "ddi", "nc") BUILTIN(__builtin_amdgcn_trig_preopf, "ffi", "nc") BUILTIN(__builtin_amdgcn_rcp, "dd", "nc") BUILTIN(__builtin_amdgcn_rcpf, "ff", "nc") +BUILTIN(__builtin_amdgcn_sqrt, "dd", "nc") +BUILTIN(__builtin_amdgcn_sqrtf, "ff", "nc") BUILTIN(__builtin_amdgcn_rsq, "dd", "nc") BUILTIN(__builtin_amdgcn_rsqf, "ff", "nc") BUILTIN(__builtin_amdgcn_rsq_clamp, "dd", "nc") @@ -162,6 +164,7 @@ BUILTIN(__builtin_amdgcn_interp_mov, "fUiUiUiUi", "nc") TARGET_BUILTIN(__builtin_amdgcn_div_fixuph, "", "nc", "16-bit-insts") TARGET_BUILTIN(__builtin_amdgcn_rcph, "hh", "nc", "16-bit-insts") +TARGET_BUILTIN(__builtin_amdgcn_sqrth, "hh", "nc", "16-bit-insts") TARGET_BUILTIN(__builtin_amdgcn_rsqh, "hh", "nc", "16-bit-insts") TARGET_BUILTIN(__builtin_amdgcn_sinh, "hh", "nc", "16-bit-insts") TARGET_BUILTIN(__builtin_amdgcn_cosh, "hh", "nc", "16-bit-insts") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2eef4f284271..b5c4841578c4 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14702,6 +14702,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_rcpf: case AMDGPU::BI__builtin_amdgcn_rcph: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); + case AMDGPU::BI__builtin_amdgcn_sqrt: + case AMDGPU::BI__builtin_amdgcn_sqrtf: + case AMDGPU::BI__builtin_amdgcn_sqrth: +return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt); case AMDGPU::BI__builtin_amdgcn_rsq: case AMDGPU::BI__builtin_amdgcn_rsqf: case AMDGPU::BI__builtin_amdgcn_rsqh: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl index e3e6b81271d1..5884f84ab081 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl @@ -22,6 +22,13 @@ void test_rcp_f16(global half* out, half a) *out = __builtin_amdgcn_rcph(a); } +// CHECK-LABEL: @test_sqrt_f16 +// CHECK: call half @llvm.amdgcn.sqrt.f16 +void test_sqrt_f16(global half* out, half a) +{ + *out = __builtin_amdgcn_sqrth(a); +} + // CHECK-LABEL: @test_rsq_f16 // CHECK: call half @llvm.amdgcn.rsq.f16 void test_rsq_f16(global half* out, half a) diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 3563ad464c66..56c83df6b6b4 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -116,6 +116,20 @@ void test_rcp_f64(global double* out, double a) *out = __builtin_amdgcn_rcp(a); } +// CHECK-LABEL: @test_sqrt_f32 +// CHECK: call float @llvm.amdgcn.sqrt.f32 +void test_sqrt_f32(global float* out, float a) +{ + *out = __builtin_amdgcn_sqrtf(a); +} + +// CHECK-LABEL: @test_sqrt_f64 +// CHECK: call double @llvm.amdgcn.sqrt.f64 +void test_sqrt_f64(global double* out, double a) +{ + *out = __builtin_amdgcn_sqrt(a); +} + // CHECK-LABEL: @test_rsq_f32 // CHECK: call float @llvm.amdgcn.rsq.f32 void test_rsq_f32(global float* out, float a) diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-f16.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-f16.cl index 3487b1a5a803..fdb2f3f3c981 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-f16.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-f16.cl @@ -8,6 +8,7 @@ void test_f16_tahiti(global half *out, half a, half b, half c) { *out = __builtin_amdgcn_div_fixuph(a, b, c); // expected-error {{'__builtin_amdgcn_div_fixuph' needs target feature 16-bit-insts}} *out = __builtin_amdgcn_rcph(a); // expect
[clang] 5c03bee - clang: Allow backend unsupported warnings
Author: Matt Arsenault Date: 2020-04-27T12:14:51-04:00 New Revision: 5c03beefa720bddb3e3f53c595a76bce7ad50f37 URL: https://github.com/llvm/llvm-project/commit/5c03beefa720bddb3e3f53c595a76bce7ad50f37 DIFF: https://github.com/llvm/llvm-project/commit/5c03beefa720bddb3e3f53c595a76bce7ad50f37.diff LOG: clang: Allow backend unsupported warnings Currently this asserts on anything other than errors. In one workaround scenario, AMDGPU emits DiagnosticInfoUnsupported as a warning for functions that can't be correctly codegened, but should never be executed. Added: clang/test/CodeGenOpenCL/backend-unsupported-warning.ll Modified: clang/include/clang/Basic/DiagnosticFrontendKinds.td clang/lib/CodeGen/CodeGenAction.cpp Removed: diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index 87fdfc89c634..9df3e79d183f 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -61,6 +61,7 @@ def note_fe_backend_invalid_loc : Note<"could " "not determine the original source location for %0:%1:%2">, BackendInfo; def err_fe_backend_unsupported : Error<"%0">, BackendInfo; +def warn_fe_backend_unsupported : Warning<"%0">, BackendInfo; def err_fe_invalid_code_complete_file : Error< "cannot locate code-completion file %0">, DefaultFatal; diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 81946b194495..b8ffe343db22 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -633,8 +633,9 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( void BackendConsumer::UnsupportedDiagHandler( const llvm::DiagnosticInfoUnsupported &D) { - // We only support errors. - assert(D.getSeverity() == llvm::DS_Error); + // We only support warnings or errors. + assert(D.getSeverity() == llvm::DS_Error || + D.getSeverity() == llvm::DS_Warning); StringRef Filename; unsigned Line, Column; @@ -652,7 +653,11 @@ void BackendConsumer::UnsupportedDiagHandler( DiagnosticPrinterRawOStream DP(MsgStream); D.print(DP); } - Diags.Report(Loc, diag::err_fe_backend_unsupported) << MsgStream.str(); + + auto DiagType = D.getSeverity() == llvm::DS_Error + ? diag::err_fe_backend_unsupported + : diag::warn_fe_backend_unsupported; + Diags.Report(Loc, DiagType) << MsgStream.str(); if (BadDebugInfo) // If we were not able to translate the file:line:col information diff --git a/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll b/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll new file mode 100644 index ..82df1261c1ae --- /dev/null +++ b/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll @@ -0,0 +1,30 @@ +; RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -S -o - %s 2>&1 | FileCheck %s + +; Check that a DiagnosticUnsupported reported as a warning works +; correctly, and is not emitted as an error. + +; CHECK: warning: test.c:2:20: in function use_lds_global_in_func i32 (): local memory global used by non-kernel function + +target triple = "amdgcn-amd-amdhsa" + +@lds = external addrspace(3) global i32, align 4 + +define i32 @use_lds_global_in_func() !dbg !5 { + %load = load i32, i32 addrspace(3)* @lds, !dbg !9 + ret i32 %load, !dbg !10 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "test.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !6, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!6 = !DISubroutineType(types: !7) +!7 = !{!8} +!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!9 = !DILocation(line: 2, column: 20, scope: !5) +!10 = !DILocation(line: 2, column: 13, scope: !5) ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 580a9f2 - Fix test without built AMDGPU
Author: Matt Arsenault Date: 2020-04-27T13:32:00-04:00 New Revision: 580a9f2c307f8d475277f20999ad5820d171beba URL: https://github.com/llvm/llvm-project/commit/580a9f2c307f8d475277f20999ad5820d171beba DIFF: https://github.com/llvm/llvm-project/commit/580a9f2c307f8d475277f20999ad5820d171beba.diff LOG: Fix test without built AMDGPU Added: Modified: clang/test/CodeGenOpenCL/backend-unsupported-warning.ll Removed: diff --git a/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll b/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll index 82df1261c1ae..597fe94885e3 100644 --- a/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll +++ b/clang/test/CodeGenOpenCL/backend-unsupported-warning.ll @@ -1,3 +1,4 @@ +; REQUIRES: amdgpu-registered-target ; RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -S -o - %s 2>&1 | FileCheck %s ; Check that a DiagnosticUnsupported reported as a warning works ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 30eeb74 - clang: Use byref for aggregate kernel arguments
Author: Matt Arsenault Date: 2020-08-06T15:52:26-04:00 New Revision: 30eeb742f1d11d7a7036e3b8a3bffc1dfd252082 URL: https://github.com/llvm/llvm-project/commit/30eeb742f1d11d7a7036e3b8a3bffc1dfd252082 DIFF: https://github.com/llvm/llvm-project/commit/30eeb742f1d11d7a7036e3b8a3bffc1dfd252082.diff LOG: clang: Use byref for aggregate kernel arguments Add address space to indirect abi info and use it for kernels. Previously, indirect arguments assumed assumed a stack passed object in the alloca address space using byval. A stack pointer is unsuitable for kernel arguments, which are passed in a separate, constant buffer with a different address space. Start using the new byref for aggregate kernel arguments. Previously these were emitted as raw struct arguments, and turned into loads in the backend. These will lower identically, although with byref you now have the option of applying an explicit alignment. In the future, a reasonable implementation would use byref for all kernel arguments (this would be a practical problem at the moment due to losing things like noalias on pointer arguments). This is mostly to avoid fighting the optimizer's treatment of aggregate load/store. SROA and instcombine both turn aggregate loads and stores into a long sequence of element loads and stores, rather than the optimizable memcpy I would expect in this situation. Now an explicit memcpy will be introduced up-front which is better understood and helps eliminate the alloca in more situations. This skips using byref in the case where HIP kernel pointer arguments in structs are promoted to global pointers. At minimum an additional patch is needed to allow coercion with indirect arguments. This also skips using it for OpenCL due to the current workaround used to support kernels calling kernels. Distinct function bodies would need to be generated up front instead of emitting an illegal call. Added: Modified: clang/include/clang/CodeGen/CGFunctionInfo.h clang/lib/CodeGen/CGCall.cpp clang/lib/CodeGen/TargetInfo.cpp clang/test/CodeGenCUDA/kernel-args.cu clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl Removed: diff --git a/clang/include/clang/CodeGen/CGFunctionInfo.h b/clang/include/clang/CodeGen/CGFunctionInfo.h index eaf5a3d5aad7..253ef946ce15 100644 --- a/clang/include/clang/CodeGen/CGFunctionInfo.h +++ b/clang/include/clang/CodeGen/CGFunctionInfo.h @@ -44,10 +44,23 @@ class ABIArgInfo { /// but also emit a zero/sign extension attribute. Extend, -/// Indirect - Pass the argument indirectly via a hidden pointer -/// with the specified alignment (0 indicates default alignment). +/// Indirect - Pass the argument indirectly via a hidden pointer with the +/// specified alignment (0 indicates default alignment) and address space. Indirect, +/// IndirectAliased - Similar to Indirect, but the pointer may be to an +/// object that is otherwise referenced. The object is known to not be +/// modified through any other references for the duration of the call, and +/// the callee must not itself modify the object. Because C allows +/// parameter variables to be modified and guarantees that they have unique +/// addresses, the callee must defensively copy the object into a local +/// variable if it might be modified or its address might be compared. +/// Since those are uncommon, in principle this convention allows programs +/// to avoid copies in more situations. However, it may introduce *extra* +/// copies if the callee fails to prove that a copy is unnecessary and the +/// caller naturally produces an unaliased object for the argument. +IndirectAliased, + /// Ignore - Ignore the argument (treat as void). Useful for void and /// empty structs. Ignore, @@ -86,6 +99,7 @@ class ABIArgInfo { unsigned AllocaFieldIndex; // isInAlloca() }; Kind TheKind; + unsigned IndirectAddrSpace : 24; // isIndirect() bool PaddingInReg : 1; bool InAllocaSRet : 1;// isInAlloca() bool InAllocaIndirect : 1;// isInAlloca() @@ -97,7 +111,8 @@ class ABIArgInfo { bool SignExt : 1; // isExtend() bool canHavePaddingType() const { -return isDirect() || isExtend() || isIndirect() || isExpand(); +return isDirect() || isExtend() || isIndirect() || isIndirectAliased() || + isExpand(); } void setPaddingType(llvm::Type *T) { assert(canHavePaddingType()); @@ -112,9 +127,10 @@ class ABIArgInfo { public: ABIArgInfo(Kind K = Direct) : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K), -PaddingInReg(false), InAllocaSRet(false), InAllocaIndirect(false), -IndirectByVal(false), IndirectRealign(false), SRetAfterThis(false), -InReg(false), CanBeFlattened(false), SignExt(false) {} +IndirectAddrSpace(0), PaddingInReg(false), InAlloc
[clang] 684dc1b - Elaborate more on --rocm-path flag.
Author: Matt Arsenault Date: 2020-05-05T09:33:18-04:00 New Revision: 684dc1bebe5cb70cfd27923940f9f8cba4f13195 URL: https://github.com/llvm/llvm-project/commit/684dc1bebe5cb70cfd27923940f9f8cba4f13195 DIFF: https://github.com/llvm/llvm-project/commit/684dc1bebe5cb70cfd27923940f9f8cba4f13195.diff LOG: Elaborate more on --rocm-path flag. I'm not sure what the conventions are for this documentation. The format seems limiting. I don't see how to refer to other flags, or mark flags as deprecated. The rst I believe these generate seems to be in source, and out of date. Added: Modified: clang/include/clang/Driver/Options.td Removed: diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f9850c60f62d..66b98b1e46fa 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -609,9 +609,9 @@ def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>, HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">; def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">; def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group, - HelpText<"ROCm installation path">; + HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, Group, - HelpText<"HIP device library path">; + HelpText<"HIP device library path. Alternative to rocm-path.">; def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group, HelpText<"HIP device library">; def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-script">, ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 03cb328 - clang: Cleanup usage of CreateMemCpy
Author: Matt Arsenault Date: 2020-05-08T20:57:56-04:00 New Revision: 03cb328d6f691bde88c754341ff3859d1c1ecc2f URL: https://github.com/llvm/llvm-project/commit/03cb328d6f691bde88c754341ff3859d1c1ecc2f DIFF: https://github.com/llvm/llvm-project/commit/03cb328d6f691bde88c754341ff3859d1c1ecc2f.diff LOG: clang: Cleanup usage of CreateMemCpy It handles the the pointee type casts in preparation for opaque pointers. Added: Modified: clang/lib/CodeGen/CGCall.cpp Removed: diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index e336741d9111..32a9ba499ecb 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1262,11 +1262,9 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, // Otherwise do coercion through memory. This is stupid, but simple. Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment()); - Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty); - Address SrcCasted = CGF.Builder.CreateElementBitCast(Src,CGF.Int8Ty); - CGF.Builder.CreateMemCpy(Casted, SrcCasted, - llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize), - false); + CGF.Builder.CreateMemCpy(Tmp.getPointer(), Tmp.getAlignment().getAsAlign(), + Src.getPointer(), Src.getAlignment().getAsAlign(), + llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize)); return CGF.Builder.CreateLoad(Tmp); } @@ -1349,11 +1347,9 @@ static void CreateCoercedStore(llvm::Value *Src, // to that information. Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment()); CGF.Builder.CreateStore(Src, Tmp); -Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty); -Address DstCasted = CGF.Builder.CreateElementBitCast(Dst,CGF.Int8Ty); -CGF.Builder.CreateMemCpy(DstCasted, Casted, -llvm::ConstantInt::get(CGF.IntPtrTy, DstSize), -false); +CGF.Builder.CreateMemCpy(Dst.getPointer(), Dst.getAlignment().getAsAlign(), + Tmp.getPointer(), Tmp.getAlignment().getAsAlign(), + llvm::ConstantInt::get(CGF.IntPtrTy, DstSize)); } } @@ -2404,10 +2400,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // FIXME: We should have a common utility for generating an aggregate // copy. CharUnits Size = getContext().getTypeSizeInChars(Ty); - auto SizeVal = llvm::ConstantInt::get(IntPtrTy, Size.getQuantity()); - Address Dst = Builder.CreateBitCast(AlignedTemp, Int8PtrTy); - Address Src = Builder.CreateBitCast(ParamAddr, Int8PtrTy); - Builder.CreateMemCpy(Dst, Src, SizeVal, false); + Builder.CreateMemCpy( + AlignedTemp.getPointer(), AlignedTemp.getAlignment().getAsAlign(), + ParamAddr.getPointer(), ParamAddr.getAlignment().getAsAlign(), + llvm::ConstantInt::get(IntPtrTy, Size.getQuantity())); V = AlignedTemp; } ArgVals.push_back(ParamValue::forIndirect(V)); ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] a881dc1 - Fix typo
Author: Matt Arsenault Date: 2020-05-09T16:00:17-04:00 New Revision: a881dc1103579926f039e81c0d25626ff8a582a9 URL: https://github.com/llvm/llvm-project/commit/a881dc1103579926f039e81c0d25626ff8a582a9 DIFF: https://github.com/llvm/llvm-project/commit/a881dc1103579926f039e81c0d25626ff8a582a9.diff LOG: Fix typo Added: Modified: clang/lib/CodeGen/TargetInfo.cpp Removed: diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 1e164d3fe2b0..bc5c1682853b 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -8353,7 +8353,7 @@ class AMDGPUABIInfo final : public DefaultABIInfo { EltTys, (STy->getName() + ".coerce").str(), STy->isPacked()); return llvm::StructType::get(getVMContext(), EltTys, STy->isPacked()); } -// Arrary types. +// Array types. if (auto ATy = dyn_cast(Ty)) { auto T = ATy->getElementType(); auto NT = coerceKernelArgumentType(T, FromAS, ToAS); ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 123bee6 - AMDGPU: Search for new ROCm bitcode library structure
Author: Matt Arsenault Date: 2020-05-12T09:41:07-04:00 New Revision: 123bee602a260150ff55c74287f583a67ee78f36 URL: https://github.com/llvm/llvm-project/commit/123bee602a260150ff55c74287f583a67ee78f36 DIFF: https://github.com/llvm/llvm-project/commit/123bee602a260150ff55c74287f583a67ee78f36.diff LOG: AMDGPU: Search for new ROCm bitcode library structure The current install situation is a mess, but I'm working on fixing it. Search for the target layout instead of one of the N options that exist today. Added: clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/hip.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/ockl.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_correctly_rounded_sqrt_off.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_daz_opt_off.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_daz_opt_on.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_finite_only_off.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_finite_only_on.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_isa_version_1010.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_isa_version_1011.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_isa_version_1012.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_isa_version_803.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_isa_version_900.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_unsafe_math_off.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_unsafe_math_on.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_wavefrontsize64_off.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/oclc_wavefrontsize64_on.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/ocml.bc clang/test/Driver/Inputs/rocm-device-libs/amdgcn/bitcode/opencl.bc Modified: clang/lib/Driver/ToolChains/AMDGPU.cpp clang/test/Driver/rocm-device-libs.cl Removed: clang/test/Driver/Inputs/rocm-device-libs/lib/hip.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/ockl.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_off.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_on.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_off.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_on.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1010.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1011.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_1012.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_803.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_900.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_off.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_on.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_off.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_wavefrontsize64_on.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/ocml.amdgcn.bc clang/test/Driver/Inputs/rocm-device-libs/lib/opencl.amdgcn.bc diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index e8c0b824ace1..345a24b10018 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -65,30 +65,18 @@ RocmInstallationDetector::RocmInstallationDetector( if (InstallPath.empty() || !D.getVFS().exists(InstallPath)) continue; -// FIXME: The install path situation is a real mess. - -// For a cmake install, these are placed directly in -// ${INSTALL_PREFIX}/lib - -// In the separate OpenCL builds, the bitcode libraries are placed in -// ${OPENCL_ROOT}/lib/x86_64/bitcode/* - -// For the rocm installed packages, these are placed at -// /opt/rocm/opencl/lib/x86_64/bitcode - -// An additional copy is installed, in scattered locations between -// /opt/rocm/hcc/rocdl/oclc -// /opt/rocm/hcc/rocdl/ockl -// /opt/rocm/hcc/rocdl/lib +// The install path situation in old versions of ROCm is a real mess, and +// use a diff erent install layout. Multiple copies of the device libraries +// exist for each frontend project, and diff er depending on which build +// system produced the pac
[clang] 14e1845 - HIP: Merge builtin library handling
Author: Matt Arsenault Date: 2020-05-12T09:50:22-04:00 New Revision: 14e184571139ba4c7347ea547074c6d9ec9c7b14 URL: https://github.com/llvm/llvm-project/commit/14e184571139ba4c7347ea547074c6d9ec9c7b14 DIFF: https://github.com/llvm/llvm-project/commit/14e184571139ba4c7347ea547074c6d9ec9c7b14.diff LOG: HIP: Merge builtin library handling Merge with the new --rocm-path handling used for OpenCL. This looks for a usable set of device libraries upfront, rather than giving a generic "no such file or directory error". If any of the required bitcode libraries are missing, this will now produce a "cannot find ROCm installation." error. This differs from the existing hip specific flags by pointing to a rocm root install instead of a single directory with bitcode files. This tries to maintain compatibility with the existing the --hip-device-lib and --hip-device-lib-path flags, as well as the HIP_DEVICE_LIB_PATH environment variable, or at least the range of uses with testcases. The existing range of uses and behavior doesn't entirely make sense to me, so some of the untested edge cases change behavior. Currently the two path forms seem to have the double purpose of a search path for an arbitrary --hip-device-lib, and for finding the stock set of libraries. Since the stock set of libraries This also changes the behavior when multiple paths are specified, and only takes the last one (and the environment variable only handles a single path). If --hip-device-lib is used, it now only treats --hip-device-lib-path as the search path for it, and does not attempt to find the rocm installation. If not, --hip-device-lib-path and the environment variable are used as the directory to search instead of the rocm root based path. This should also automatically fix handling of the options to use wave64. Added: Modified: clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/AMDGPU.h clang/lib/Driver/ToolChains/HIP.cpp clang/test/Driver/hip-device-libs.hip clang/test/Driver/rocm-device-libs.cl Removed: diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 345a24b10018..fd81fec5f452 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -21,6 +21,67 @@ using namespace clang::driver::toolchains; using namespace clang; using namespace llvm::opt; +void RocmInstallationDetector::scanLibDevicePath() { + assert(!LibDevicePath.empty()); + + const StringRef Suffix(".bc"); + + std::error_code EC; + for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE; + !EC && LI != LE; LI = LI.increment(EC)) { +StringRef FilePath = LI->path(); +StringRef FileName = llvm::sys::path::filename(FilePath); +if (!FileName.endswith(Suffix)) + continue; + +StringRef BaseName = FileName.drop_back(Suffix.size()); + +if (BaseName == "ocml") { + OCML = FilePath; +} else if (BaseName == "ockl") { + OCKL = FilePath; +} else if (BaseName == "opencl") { + OpenCL = FilePath; +} else if (BaseName == "hip") { + HIP = FilePath; +} else if (BaseName == "oclc_finite_only_off") { + FiniteOnly.Off = FilePath; +} else if (BaseName == "oclc_finite_only_on") { + FiniteOnly.On = FilePath; +} else if (BaseName == "oclc_daz_opt_on") { + DenormalsAreZero.On = FilePath; +} else if (BaseName == "oclc_daz_opt_off") { + DenormalsAreZero.Off = FilePath; +} else if (BaseName == "oclc_correctly_rounded_sqrt_on") { + CorrectlyRoundedSqrt.On = FilePath; +} else if (BaseName == "oclc_correctly_rounded_sqrt_off") { + CorrectlyRoundedSqrt.Off = FilePath; +} else if (BaseName == "oclc_unsafe_math_on") { + UnsafeMath.On = FilePath; +} else if (BaseName == "oclc_unsafe_math_off") { + UnsafeMath.Off = FilePath; +} else if (BaseName == "oclc_wavefrontsize64_on") { + WavefrontSize64.On = FilePath; +} else if (BaseName == "oclc_wavefrontsize64_off") { + WavefrontSize64.Off = FilePath; +} else { + // Process all bitcode filenames that look like + // ocl_isa_version_XXX.amdgcn.bc + const StringRef DeviceLibPrefix = "oclc_isa_version_"; + if (!BaseName.startswith(DeviceLibPrefix)) +continue; + + StringRef IsaVersionNumber = +BaseName.drop_front(DeviceLibPrefix.size()); + + llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber; + SmallString<8> Tmp; + LibDeviceMap.insert( +std::make_pair(GfxName.toStringRef(Tmp), FilePath.str())); +} + } +} + RocmInstallationDetector::RocmInstallationDetector( const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args) @@ -60,6 +121,27 @@ RocmInstallationDetector::RocmInstallationDetector( bool NoBuiltinLibs = Args.hasArg(options::OPT_nogpulib); + assert(LibDevicePath.empty
[clang] 235fb7d - AMDGPU/OpenCL: Accept -nostdlib in place of -nogpulib
Author: Matt Arsenault Date: 2020-05-14T12:33:31-04:00 New Revision: 235fb7dc24b1cf7034dfc76bb853ffb4ac5dec5d URL: https://github.com/llvm/llvm-project/commit/235fb7dc24b1cf7034dfc76bb853ffb4ac5dec5d DIFF: https://github.com/llvm/llvm-project/commit/235fb7dc24b1cf7034dfc76bb853ffb4ac5dec5d.diff LOG: AMDGPU/OpenCL: Accept -nostdlib in place of -nogpulib -nogpulib makes sense when there is a host (where -nostdlib would apply) and offload target. Accept nostdlib when there is no offload target as an alias. Added: clang/test/Driver/rocm-detect.hip Modified: clang/lib/Driver/ToolChains/AMDGPU.cpp clang/test/Driver/rocm-not-found.cl Removed: diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index fd81fec5f452..193ccad98f52 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -361,6 +361,12 @@ void ROCMToolChain::addClangTargetOptions( AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); + // For the OpenCL case where there is no offload target, accept -nostdlib to + // disable bitcode linking. + if (DeviceOffloadingKind == Action::OFK_None && + DriverArgs.hasArg(options::OPT_nostdlib)) +return; + if (DriverArgs.hasArg(options::OPT_nogpulib)) return; diff --git a/clang/test/Driver/rocm-detect.hip b/clang/test/Driver/rocm-detect.hip new file mode 100644 index ..82ed7138098a --- /dev/null +++ b/clang/test/Driver/rocm-detect.hip @@ -0,0 +1,27 @@ +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// Make sure the appropriate device specific library is available. + +// We don't include every target in the test directory, so just pick a valid +// target not included in the test. + +// RUN: %clang -### -v -target x86_64-linux-gnu --cuda-gpu-arch=gfx902 \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX902-DEFAULTLIBS %s + +// Should not interpret -nostdlib as disabling offload libraries. +// RUN: %clang -### -v -target x86_64-linux-gnu --cuda-gpu-arch=gfx902 -nostdlib \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX902-DEFAULTLIBS %s + + +// RUN: %clang -### -v -target x86_64-linux-gnu --cuda-gpu-arch=gfx902 -nogpulib \ +// RUN: --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=COMMON,GFX902,NODEFAULTLIBS %s + + +// GFX902-DEFAULTLIBS: error: cannot find device library for gfx902. Provide path to diff erent ROCm installation via --rocm-path, or pass -nogpulib to build without linking default libraries. + +// NODEFAULTLIBS-NOT: error: cannot find diff --git a/clang/test/Driver/rocm-not-found.cl b/clang/test/Driver/rocm-not-found.cl index 49b6c7efcf99..8ecc4b0ef105 100644 --- a/clang/test/Driver/rocm-not-found.cl +++ b/clang/test/Driver/rocm-not-found.cl @@ -7,5 +7,7 @@ // RUN: %clang -### --rocm-path=%s/no-rocm-there -target amdgcn--amdhsa %s 2>&1 | FileCheck %s --check-prefix ERR // ERR: cannot find ROCm installation. Provide its path via --rocm-path, or pass -nogpulib. +// Accept nogpulib or nostdlib for OpenCL. // RUN: %clang -### -nogpulib --rocm-path=%s/no-rocm-there %s 2>&1 | FileCheck %s --check-prefix OK +// RUN: %clang -### -nostdlib --rocm-path=%s/no-rocm-there %s 2>&1 | FileCheck %s --check-prefix OK // OK-NOT: cannot find ROCm installation. ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25343: [OpenCL] Mark group functions as noduplicate in opencl-c.h
arsenm added a comment. These should be convergent instead https://reviews.llvm.org/D25343 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r319735 - AMDGPU: Don't add fp64 feature to r600 subtargets
Author: arsenm Date: Mon Dec 4 19:51:26 2017 New Revision: 319735 URL: http://llvm.org/viewvc/llvm-project?rev=319735&view=rev Log: AMDGPU: Don't add fp64 feature to r600 subtargets Should fix test after r319709 Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=319735&r1=319734&r2=319735&view=diff == --- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original) +++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Mon Dec 4 19:51:26 2017 @@ -194,7 +194,7 @@ bool AMDGPUTargetInfo::initFeatureMap( case GK_R700_DOUBLE_OPS: case GK_EVERGREEN_DOUBLE_OPS: case GK_CAYMAN: - Features["fp64"] = true; + // TODO: Add fp64 when implemented. break; case GK_NONE: return false; ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [clang-tools-extra] [clang] [llvm] [InferAddressSpaces] Fix constant replace to avoid modifying other functions (PR #70611)
@@ -334,6 +335,15 @@ template<> struct simplify_type { } }; +template <> struct GraphTraits { arsenm wrote: If we allowed bitcasts between address spaces with the same size, we could drop addrspacecast constantexprs altogether https://github.com/llvm/llvm-project/pull/70611 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic (PR #71139)
@@ -17632,8 +17632,20 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector Args; -for (unsigned I = 0; I != E->getNumArgs(); ++I) - Args.push_back(EmitScalarExpr(E->getArg(I))); +for (unsigned I = 0; I != E->getNumArgs(); ++I) { + llvm::Value *Arg = EmitScalarExpr(E->getArg(I)); + // Except first two input operands, all other are imm operands for dpp + // intrinsic. + if (llvm::is_contained(std::initializer_list{2, 3, 4, 5}, I)) { arsenm wrote: Yes, the others can be fixed separately. https://github.com/llvm/llvm-project/pull/71139 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic (PR #71139)
@@ -17632,8 +17632,27 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector Args; -for (unsigned I = 0; I != E->getNumArgs(); ++I) - Args.push_back(EmitScalarExpr(E->getArg(I))); +// Find out if any arguments are required to be integer constant arsenm wrote: Can you factor the existing generic code into a helper function? This is mostly copy paste of the default builtin path https://github.com/llvm/llvm-project/pull/71139 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AMDGPU] Emit a waitcnt instruction after each memory instruction (PR #68932)
https://github.com/arsenm requested changes to this pull request. https://github.com/llvm/llvm-project/pull/68932 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Emit a waitcnt instruction after each memory instruction (PR #68932)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/68932 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Emit a waitcnt instruction after each memory instruction (PR #68932)
@@ -52,6 +52,11 @@ static cl::opt ForceEmitZeroFlag( cl::desc("Force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"), cl::init(false), cl::Hidden); +static cl::opt +PreciseMemOpFlag("amdgpu-precise-memory-op", + cl::desc("Emit s_waitcnt 0 after each memory operation"), + cl::init(false)); + arsenm wrote: I think this should be fused into an enum flag with the existing waitcnt flag. https://github.com/llvm/llvm-project/pull/68932 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Emit a waitcnt instruction after each memory instruction (PR #68932)
@@ -1809,6 +1816,23 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML, return HasVMemLoad && UsesVgprLoadedOutside; } +bool SIInsertWaitcnts::insertWaitcntAfterMemOp(MachineFunction &MF) { + bool Modified = false; + + for (auto &MBB : MF) { arsenm wrote: Should try to integrate with the rest of the logic instead of adding a separate pass over the function https://github.com/llvm/llvm-project/pull/68932 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU] const-fold imm operands of amdgcn_update_dpp intrinsic (PR #71139)
https://github.com/arsenm approved this pull request. Could probably golf this down for more sharing with the default path but this is a start https://github.com/llvm/llvm-project/pull/71139 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] clang: Add pragma clang fp reciprocal (PR #68267)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/68267 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Revert "Revert "[AMDGPU] const-fold imm operands of (PR #71669)
@@ -0,0 +1,48 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang --offload-arch=gfx906 -S -o - -emit-llvm --cuda-device-only -nogpuinc -nogpulib\ arsenm wrote: This should use %clang_cc1, look at some of the other CodeGen* test run lines for examples https://github.com/llvm/llvm-project/pull/71669 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Revert "Revert "[AMDGPU] const-fold imm operands of (PR #71669)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/71669 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[openmp] [clang] [llvm] [OpenMP] Rework handling of global ctor/dtors in OpenMP (PR #71739)
@@ -2794,6 +2794,14 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, if (D.isNoDestroy(CGM.getContext())) return; + // OpenMP offloading supports C++ constructors and destructors but we do not + // always have 'atexit' available. Instead lower these to use the LLVM global + // destructors which we can handle directly in the runtime. + if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPIsTargetDevice && + !D.isStaticLocal() && + (CGM.getTriple().isAMDGPU() || CGM.getTriple().isNVPTX())) arsenm wrote: Oh look, it's both of my favorite patterns. Can you refine this into something better than language X | language Y and AMDGPU || PTX https://github.com/llvm/llvm-project/pull/71739 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [llvm] [clang-tools-extra] [clang] [InferAddressSpaces] Fix constant replace to avoid modifying other functions (PR #70611)
https://github.com/arsenm approved this pull request. I think it would be better if we could eliminate ConstantExpr addrspacecasts from the IR altogether, which would avoid most of the complexity here. I would also somewhat prefer to push this DFS into a helper function, but can live with it inline as-is https://github.com/llvm/llvm-project/pull/70611 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CodeGen] Implement post-opt linking option for builtin bitocdes (PR #69371)
@@ -113,7 +120,7 @@ class EmitAssemblyHelper { const CodeGenOptions &CodeGenOpts; const clang::TargetOptions &TargetOpts; const LangOptions &LangOpts; - Module *TheModule; + llvm::Module *TheModule; arsenm wrote: Why did this suddenly need qualification? https://github.com/llvm/llvm-project/pull/69371 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CodeGen] Implement post-opt linking option for builtin bitocdes (PR #69371)
@@ -98,6 +100,11 @@ extern cl::opt PrintPipelinePasses; static cl::opt ClSanitizeOnOptimizerEarlyEP( "sanitizer-early-opt-ep", cl::Optional, cl::desc("Insert sanitizers on OptimizerEarlyEP."), cl::init(false)); + +// Re-link builtin bitcodes after optimization +static cl::opt ClRelinkBuiltinBitcodePostop( +"relink-builtin-bitcode-postop", cl::Optional, +cl::desc("Re-link builtin bitcodes after optimization."), cl::init(false)); arsenm wrote: Not a proper flag? Where/how is -mlink-builtin-bitcode defined? https://github.com/llvm/llvm-project/pull/69371 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Emit a waitcnt instruction after each memory instruction (PR #68932)
@@ -1809,6 +1816,23 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML, return HasVMemLoad && UsesVgprLoadedOutside; } +bool SIInsertWaitcnts::insertWaitcntAfterMemOp(MachineFunction &MF) { + bool Modified = false; + + for (auto &MBB : MF) { arsenm wrote: I think it makes it harder to reason about the pass as a whole to have it as a totally separate phase https://github.com/llvm/llvm-project/pull/68932 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU] Emit a waitcnt instruction after each memory instruction (PR #68932)
@@ -1809,6 +1816,23 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML, return HasVMemLoad && UsesVgprLoadedOutside; } +bool SIInsertWaitcnts::insertWaitcntAfterMemOp(MachineFunction &MF) { + bool Modified = false; + + for (auto &MBB : MF) { arsenm wrote: Plus I think the two separate, but closely related cl::opts is confusing https://github.com/llvm/llvm-project/pull/68932 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [openmp] [OpenMP] Rework handling of global ctor/dtors in OpenMP (PR #71739)
@@ -2794,6 +2794,14 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, if (D.isNoDestroy(CGM.getContext())) return; + // OpenMP offloading supports C++ constructors and destructors but we do not + // always have 'atexit' available. Instead lower these to use the LLVM global + // destructors which we can handle directly in the runtime. + if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPIsTargetDevice && + !D.isStaticLocal() && + (CGM.getTriple().isAMDGPU() || CGM.getTriple().isNVPTX())) arsenm wrote: Would also just hide this in a target/lang predicate that lists these https://github.com/llvm/llvm-project/pull/71739 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] clang: Add pragma clang fp reciprocal (PR #68267)
@@ -0,0 +1,130 @@ +// RUN: %clang_cc1 -O3 -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,DEFAULT %s +// RUN: %clang_cc1 -O3 -triple %itanium_abi_triple -freciprocal-math -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,FLAG %s + +float base(float a, float b, float c) { +// CHECK-LABEL: _Z4basefff +// FLAG: %[[A:.+]] = fdiv arcp float %b, %c +// FLAG: %[[M:.+]] = fdiv arcp float %[[A]], %b +// FLAG-NEXT: fadd arcp float %[[M]], %c + +// DEFAULT: %[[A:.+]] = fdiv float %b, %c +// DEFAULT: %[[M:.+]] = fdiv float %[[A]], %b +// DEFAULT-NEXT: fadd float %[[M]], %c + a = b / c; + return a / b + c; +} + +// Simple case +float fp_recip_simple(float a, float b, float c) { +// CHECK-LABEL: _Z15fp_recip_simplefff +// CHECK: %[[A:.+]] = fdiv arcp float %b, %c +// CHECK: %[[M:.+]] = fdiv arcp float %[[A]], %b +// CHECK-NEXT: fadd arcp float %[[M]], %c +#pragma clang fp reciprocal(on) + a = b / c; + return a / b + c; +} + +// Test interaction with -freciprocal-math +float fp_recip_disable(float a, float b, float c) { +// CHECK-LABEL: _Z16fp_recip_disablefff +// CHECK: %[[A:.+]] = fdiv float %b, %c +// CHECK: %[[M:.+]] = fdiv float %[[A]], %b +// CHECK-NEXT: fadd float %[[M]], %c +#pragma clang fp reciprocal(off) + a = b / c; + return a / b + c; +} + +float fp_recip_with_reassoc_simple(float a, float b, float c) { +// CHECK-LABEL: _Z28fp_recip_with_reassoc_simplefff +// CHECK: %[[A:.+]] = fmul reassoc arcp float %b, %c +// CHECK: %[[M:.+]] = fdiv reassoc arcp float %b, %[[A]] +// CHECK-NEXT: fadd reassoc arcp float %[[M]], %c +#pragma clang fp reciprocal(on) reassociate(on) + a = b / c; + return a / b + c; +} + +// arcp pragma should only apply to its scope +float fp_recip_scoped(float a, float b, float c) { + // CHECK-LABEL: _Z15fp_recip_scopedfff + // DEFAULT: %[[M:.+]] = fdiv float %a, %b + // DEFAULT-NEXT: fadd float %[[M]], %c + // FLAG: %[[M:.+]] = fdiv arcp float %a, %b + // FLAG-NEXT: fadd arcp float %[[M]], %c + { +#pragma clang fp reciprocal(on) + } + return a / b + c; +} + +// arcp pragma should apply to templates as well +class Foo {}; +Foo operator+(Foo, Foo); +template +T template_recip(T a, T b, T c) { +#pragma clang fp reciprocal(on) + return ((a / b) - c) + c; +} + +float fp_recip_template(float a, float b, float c) { + // CHECK-LABEL: _Z17fp_recip_templatefff + // CHECK: %[[A1:.+]] = fdiv arcp float %a, %b + // CHECK-NEXT: %[[A2:.+]] = fsub arcp float %[[A1]], %c + // CHECK-NEXT: fadd arcp float %[[A2]], %c + return template_recip(a, b, c); +} + +// File Scoping should work across functions +#pragma clang fp reciprocal(on) +float fp_file_scope_on(float a, float b, float c) { + // CHECK-LABEL: _Z16fp_file_scope_onfff + // CHECK: %[[M1:.+]] = fdiv arcp float %a, %c + // CHECK-NEXT: %[[M2:.+]] = fdiv arcp float %b, %c + // CHECK-NEXT: fadd arcp float %[[M1]], %[[M2]] + return (a / c) + (b / c); +} + +// Inner pragma has precedence +float fp_file_scope_stop(float a, float b, float c) { + // CHECK-LABEL: _Z18fp_file_scope_stopfff + // CHECK: %[[A:.+]] = fdiv arcp float %a, %a + // CHECK: %[[M1:.+]] = fdiv float %[[A]], %c + // CHECK-NEXT: %[[M2:.+]] = fdiv float %b, %c + // CHECK-NEXT: fsub float %[[M1]], %[[M2]] + a = a / a; + { +#pragma clang fp reciprocal(off) +return (a / c) - (b / c); + } +} + +#pragma clang fp reciprocal(off) +float fp_recip_off(float a, float b, float c) { + // CHECK-LABEL: _Z12fp_recip_of + // CHECK: %[[D1:.+]] = fdiv float %a, %c + // CHECK-NEXT: %[[D2:.+]] = fdiv float %b, %c + // CHECK-NEXT: fadd float %[[D1]], %[[D2]] + return (a / c) + (b / c); +} + +// Takes latest flag +float fp_recip_many(float a, float b, float c) { +// CHECK-LABEL: _Z13fp_recip_manyfff +// CHECK: %[[D1:.+]] = fdiv arcp float %a, %c +// CHECK-NEXT: %[[D2:.+]] = fdiv arcp float %b, %c +// CHECK-NEXT: fadd arcp float %[[D1]], %[[D2]] +#pragma clang fp reciprocal(off) reciprocal(on) + return (a / c) + (b / c); +} + +// Pragma does not propagate through called functions +float helper_func(float a, float b, float c) { return a + b + c; } +float fp_recip_call_helper(float a, float b, float c) { +// CHECK-LABEL: _Z20fp_recip_call_helperfff +// CHECK: %[[S1:.+]] = fadd float %a, %b +// CHECK-NEXT: fadd float %[[S1]], %c +#pragma clang fp reciprocal(on) + return helper_func(a, b, c); +} arsenm wrote: That's already here on line 44 https://github.com/llvm/llvm-project/pull/68267 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CGExprScalar] Remove no-op ptr-to-ptr bitcast (NFC) (PR #72072)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/72072 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CGExprScalar] Remove no-op ptr-to-ptr bitcast (NFC) (PR #72072)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/72072 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][CGExprScalar] Remove no-op ptr-to-ptr bitcast (NFC) (PR #72072)
@@ -2227,14 +2227,6 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { llvm::Value *V = CE->changesVolatileQualification() ? EmitLoadOfLValue(CE) : Visit(const_cast(E)); -if (V) { - // CK_NoOp can model a pointer qualification conversion, which can remove - // an array bound and change the IR type. - // FIXME: Once pointee types are removed from IR, remove this. - llvm::Type *T = ConvertType(DestTy); - if (T != V->getType()) -V = Builder.CreateBitCast(V, T); -} return V; arsenm wrote: Could fold this into a direct return CE->... https://github.com/llvm/llvm-project/pull/72072 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [compiler-rt] [HIP] support 128 bit int division (PR #71978)
https://github.com/arsenm commented: 128-bit division should already work, we have an IR integer division expansion for > 64-bit divides. I think moving towards getting the infrastructure to a place where we can link in compiler-rt binaries is a good thing, but I don't think we're in a position to actually enable that at this time. We still don't have everything necessary to provide object linking, which this seems to rely on https://github.com/llvm/llvm-project/pull/71978 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [clang] [llvm] [HIP] support 128 bit int division (PR #71978)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/71978 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[compiler-rt] [llvm] [clang] [HIP] support 128 bit int division (PR #71978)
@@ -937,27 +938,105 @@ bool CodeGenAction::loadLinkModules(CompilerInstance &CI) { for (const CodeGenOptions::BitcodeFileToLink &F : CI.getCodeGenOpts().LinkBitcodeFiles) { -auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); -if (!BCBuf) { + +auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename); +if (!BCBufOrErr) { CI.getDiagnostics().Report(diag::err_cannot_open_file) - << F.Filename << BCBuf.getError().message(); + << F.Filename << BCBufOrErr.getError().message(); LinkModules.clear(); return true; } +auto &BCBuf = *BCBufOrErr; + Expected> ModuleOrErr = -getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext); -if (!ModuleOrErr) { - handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { +getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext); + +if (ModuleOrErr) { + LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, + F.Internalize, F.LinkFlags}); + continue; +} else { + // If parsing as bitcode failed, clear the error and try to parse as an + // archive. + handleAllErrors(ModuleOrErr.takeError(), + [&](const llvm::ErrorInfoBase &EIB) {}); + + Expected> BinOrErr = + llvm::object::createBinary(BCBuf->getMemBufferRef(), VMContext); + + if (!BinOrErr) { +handleAllErrors(BinOrErr.takeError(), +[&](const llvm::ErrorInfoBase &EIB) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) + << F.Filename << EIB.message(); +}); +LinkModules.clear(); +return true; + } + + std::unique_ptr &Bin = *BinOrErr; + + if (Bin->isArchive()) { +llvm::object::Archive *Archive = +llvm::cast(Bin.get()); +Error Err = Error::success(); + +for (auto &Child : Archive->children(Err)) { + Expected ChildBufOrErr = + Child.getMemoryBufferRef(); + if (!ChildBufOrErr) { arsenm wrote: Can you add some tests for the various error cases? https://github.com/llvm/llvm-project/pull/71978 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [compiler-rt] [HIP] support 128 bit int division (PR #71978)
@@ -937,27 +938,105 @@ bool CodeGenAction::loadLinkModules(CompilerInstance &CI) { for (const CodeGenOptions::BitcodeFileToLink &F : CI.getCodeGenOpts().LinkBitcodeFiles) { -auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); -if (!BCBuf) { + +auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename); +if (!BCBufOrErr) { CI.getDiagnostics().Report(diag::err_cannot_open_file) - << F.Filename << BCBuf.getError().message(); + << F.Filename << BCBufOrErr.getError().message(); LinkModules.clear(); return true; } +auto &BCBuf = *BCBufOrErr; + Expected> ModuleOrErr = -getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext); -if (!ModuleOrErr) { - handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { +getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext); + +if (ModuleOrErr) { + LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, + F.Internalize, F.LinkFlags}); + continue; +} else { + // If parsing as bitcode failed, clear the error and try to parse as an + // archive. + handleAllErrors(ModuleOrErr.takeError(), + [&](const llvm::ErrorInfoBase &EIB) {}); + + Expected> BinOrErr = + llvm::object::createBinary(BCBuf->getMemBufferRef(), VMContext); + + if (!BinOrErr) { +handleAllErrors(BinOrErr.takeError(), +[&](const llvm::ErrorInfoBase &EIB) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) + << F.Filename << EIB.message(); +}); +LinkModules.clear(); +return true; + } + + std::unique_ptr &Bin = *BinOrErr; + + if (Bin->isArchive()) { +llvm::object::Archive *Archive = +llvm::cast(Bin.get()); +Error Err = Error::success(); + +for (auto &Child : Archive->children(Err)) { + Expected ChildBufOrErr = + Child.getMemoryBufferRef(); + if (!ChildBufOrErr) { +handleAllErrors( +ChildBufOrErr.takeError(), [&](const llvm::ErrorInfoBase &EIB) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) + << F.Filename << EIB.message(); +}); +continue; + } + auto ChildBuffer = llvm::MemoryBuffer::getMemBufferCopy( + ChildBufOrErr->getBuffer(), ChildBufOrErr->getBufferIdentifier()); + + if (!ChildBuffer) { +handleAllErrors( +ChildBufOrErr.takeError(), [&](const llvm::ErrorInfoBase &EIB) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) + << F.Filename << EIB.message(); +}); +continue; + } + + Expected> ChildModuleOrErr = + getOwningLazyBitcodeModule(std::move(ChildBuffer), *VMContext); + if (!ChildModuleOrErr) { +handleAllErrors( +ChildModuleOrErr.takeError(), +[&](const llvm::ErrorInfoBase &EIB) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) + << F.Filename << EIB.message(); +}); +continue; + } + + LinkModules.push_back({std::move(ChildModuleOrErr.get()), arsenm wrote: Not sure you need the .get() https://github.com/llvm/llvm-project/pull/71978 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [compiler-rt] [HIP] support 128 bit int division (PR #71978)
@@ -937,27 +938,105 @@ bool CodeGenAction::loadLinkModules(CompilerInstance &CI) { for (const CodeGenOptions::BitcodeFileToLink &F : CI.getCodeGenOpts().LinkBitcodeFiles) { -auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); -if (!BCBuf) { + +auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename); +if (!BCBufOrErr) { CI.getDiagnostics().Report(diag::err_cannot_open_file) - << F.Filename << BCBuf.getError().message(); + << F.Filename << BCBufOrErr.getError().message(); LinkModules.clear(); return true; } +auto &BCBuf = *BCBufOrErr; + Expected> ModuleOrErr = -getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext); -if (!ModuleOrErr) { - handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { +getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext); + +if (ModuleOrErr) { + LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, + F.Internalize, F.LinkFlags}); + continue; +} else { arsenm wrote: no else after continue https://github.com/llvm/llvm-project/pull/71978 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [compiler-rt] [llvm] [HIP] support 128 bit int division (PR #71978)
@@ -937,27 +938,105 @@ bool CodeGenAction::loadLinkModules(CompilerInstance &CI) { for (const CodeGenOptions::BitcodeFileToLink &F : CI.getCodeGenOpts().LinkBitcodeFiles) { -auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); -if (!BCBuf) { + +auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename); +if (!BCBufOrErr) { CI.getDiagnostics().Report(diag::err_cannot_open_file) - << F.Filename << BCBuf.getError().message(); + << F.Filename << BCBufOrErr.getError().message(); LinkModules.clear(); return true; } +auto &BCBuf = *BCBufOrErr; + Expected> ModuleOrErr = -getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext); -if (!ModuleOrErr) { - handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { +getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext); + +if (ModuleOrErr) { + LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, + F.Internalize, F.LinkFlags}); + continue; +} else { + // If parsing as bitcode failed, clear the error and try to parse as an + // archive. + handleAllErrors(ModuleOrErr.takeError(), + [&](const llvm::ErrorInfoBase &EIB) {}); + + Expected> BinOrErr = + llvm::object::createBinary(BCBuf->getMemBufferRef(), VMContext); + + if (!BinOrErr) { +handleAllErrors(BinOrErr.takeError(), +[&](const llvm::ErrorInfoBase &EIB) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) + << F.Filename << EIB.message(); +}); +LinkModules.clear(); +return true; + } + + std::unique_ptr &Bin = *BinOrErr; + + if (Bin->isArchive()) { arsenm wrote: Can you split all of this out into an archive handling helper function? https://github.com/llvm/llvm-project/pull/71978 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [compiler-rt] [HIP] support 128 bit int division (PR #71978)
@@ -937,27 +938,105 @@ bool CodeGenAction::loadLinkModules(CompilerInstance &CI) { for (const CodeGenOptions::BitcodeFileToLink &F : CI.getCodeGenOpts().LinkBitcodeFiles) { -auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); -if (!BCBuf) { + +auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename); +if (!BCBufOrErr) { CI.getDiagnostics().Report(diag::err_cannot_open_file) - << F.Filename << BCBuf.getError().message(); + << F.Filename << BCBufOrErr.getError().message(); LinkModules.clear(); return true; } +auto &BCBuf = *BCBufOrErr; + Expected> ModuleOrErr = -getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext); -if (!ModuleOrErr) { - handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { +getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext); + +if (ModuleOrErr) { + LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, + F.Internalize, F.LinkFlags}); + continue; +} else { + // If parsing as bitcode failed, clear the error and try to parse as an + // archive. + handleAllErrors(ModuleOrErr.takeError(), + [&](const llvm::ErrorInfoBase &EIB) {}); + + Expected> BinOrErr = + llvm::object::createBinary(BCBuf->getMemBufferRef(), VMContext); + + if (!BinOrErr) { +handleAllErrors(BinOrErr.takeError(), +[&](const llvm::ErrorInfoBase &EIB) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) + << F.Filename << EIB.message(); +}); +LinkModules.clear(); +return true; + } + + std::unique_ptr &Bin = *BinOrErr; + + if (Bin->isArchive()) { +llvm::object::Archive *Archive = +llvm::cast(Bin.get()); +Error Err = Error::success(); arsenm wrote: I assume this doesn't require initialization https://github.com/llvm/llvm-project/pull/71978 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [compiler-rt] [HIP] support 128 bit int division (PR #71978)
@@ -596,6 +596,7 @@ static bool mustPreserveGV(const GlobalValue &GV) { if (const Function *F = dyn_cast(&GV)) return F->isDeclaration() || F->getName().startswith("__asan_") || F->getName().startswith("__sanitizer_") || + F->getName() == "__divti3" || arsenm wrote: we're stuck preserving this in the IR at all times which isn't really ideal https://github.com/llvm/llvm-project/pull/71978 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [compiler-rt] [HIP] support 128 bit int division (PR #71978)
@@ -937,27 +938,105 @@ bool CodeGenAction::loadLinkModules(CompilerInstance &CI) { for (const CodeGenOptions::BitcodeFileToLink &F : CI.getCodeGenOpts().LinkBitcodeFiles) { -auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename); -if (!BCBuf) { + +auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename); +if (!BCBufOrErr) { CI.getDiagnostics().Report(diag::err_cannot_open_file) - << F.Filename << BCBuf.getError().message(); + << F.Filename << BCBufOrErr.getError().message(); LinkModules.clear(); return true; } +auto &BCBuf = *BCBufOrErr; + Expected> ModuleOrErr = -getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext); -if (!ModuleOrErr) { - handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) { +getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext); + +if (ModuleOrErr) { + LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs, + F.Internalize, F.LinkFlags}); + continue; +} else { + // If parsing as bitcode failed, clear the error and try to parse as an + // archive. + handleAllErrors(ModuleOrErr.takeError(), + [&](const llvm::ErrorInfoBase &EIB) {}); + + Expected> BinOrErr = + llvm::object::createBinary(BCBuf->getMemBufferRef(), VMContext); + + if (!BinOrErr) { +handleAllErrors(BinOrErr.takeError(), +[&](const llvm::ErrorInfoBase &EIB) { + CI.getDiagnostics().Report(diag::err_cannot_open_file) + << F.Filename << EIB.message(); +}); +LinkModules.clear(); +return true; + } + + std::unique_ptr &Bin = *BinOrErr; + + if (Bin->isArchive()) { +llvm::object::Archive *Archive = +llvm::cast(Bin.get()); +Error Err = Error::success(); + +for (auto &Child : Archive->children(Err)) { + Expected ChildBufOrErr = + Child.getMemoryBufferRef(); + if (!ChildBufOrErr) { arsenm wrote: Also the base case, a driver test would help me see what this is actually doing https://github.com/llvm/llvm-project/pull/71978 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [compiler-rt] [HIP] support 128 bit int division (PR #71978)
@@ -3630,10 +3631,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, std::vector Ops; Ops.push_back(Chain); + bool AddTargetGlobalAddr = true; + // Try to find the callee in the current module. + if (isa(Callee)) { +Callee = DAG.getSymbolFunctionGlobalAddress(Callee); +AddTargetGlobalAddr = false; + } arsenm wrote: This should be split into a separate backend only change https://github.com/llvm/llvm-project/pull/71978 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] clang: Add pragma clang fp reciprocal (PR #68267)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/68267 >From f09d8efdcbb5ffb9cd39d686205a120b6a82a01b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 31 Aug 2023 17:33:35 -0400 Subject: [PATCH] clang: Add pragma clang fp reciprocal Just follow along with the reassociate pragma. This allows locally setting the arcp fast math flag. Previously you could only access this through the global -freciprocal-math. --- clang/docs/LanguageExtensions.rst | 16 +++ clang/docs/ReleaseNotes.rst | 2 + .../clang/Basic/DiagnosticParseKinds.td | 3 +- .../clang/Basic/DiagnosticSemaKinds.td| 2 +- clang/include/clang/Basic/PragmaKinds.h | 8 ++ clang/include/clang/Sema/Sema.h | 5 +- clang/lib/Parse/ParsePragma.cpp | 51 --- clang/lib/Sema/SemaAttr.cpp | 18 ++- clang/test/CodeGen/fp-reciprocal-pragma.cpp | 130 ++ clang/test/Parser/pragma-fp-contract.c| 15 ++ clang/test/Parser/pragma-fp.cpp | 4 +- .../test/Sema/eval-method-with-unsafe-math.c | 32 + 12 files changed, 256 insertions(+), 30 deletions(-) create mode 100644 clang/test/CodeGen/fp-reciprocal-pragma.cpp diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 30e288f986782fd..090600275956be0 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -4617,6 +4617,22 @@ The pragma can take two values: ``on`` and ``off``. float v = t + z; } +``#pragma clang fp reciprocal`` allows control over using reciprocal +approximations in floating point expressions. When enabled, this +pragma allows the expression ``x / y`` to be approximated as ``x * +(1.0 / y)``. This pragma can be used to disable reciprocal +approximation when it is otherwise enabled for the translation unit +with the ``-freciprocal-math`` flag or other fast-math options. The +pragma can take two values: ``on`` and ``off``. + +.. code-block:: c++ + + float f(float x, float y) + { +// Enable floating point reciprocal approximation +#pragma clang fp reciprocal(on) +return x / y; + } ``#pragma clang fp contract`` specifies whether the compiler should contract a multiply and an addition (or subtraction) into a fused FMA diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 74358219ba9fb22..a8b68fb8c3ee486 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -218,6 +218,8 @@ Non-comprehensive list of changes in this release For scalable vectors, e.g., SVE or RISC-V V, the number of elements is not known at compile-time and is determined at runtime. +* Added ``#pragma clang fp reciprocal``. + New Compiler Flags -- diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index de180344fcc5c74..2f3bef33f936883 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1569,12 +1569,13 @@ def note_pragma_loop_invalid_vectorize_option : Note< "vectorize_width(X, scalable) where X is an integer, or vectorize_width('fixed' or 'scalable')">; def err_pragma_fp_invalid_option : Error< - "%select{invalid|missing}0 option%select{ %1|}0; expected 'contract', 'reassociate' or 'exceptions'">; + "%select{invalid|missing}0 option%select{ %1|}0; expected 'contract', 'reassociate', 'reciprocal', or 'exceptions'">; def err_pragma_fp_invalid_argument : Error< "unexpected argument '%0' to '#pragma clang fp %1'; expected " "%select{" "'fast' or 'on' or 'off'|" "'on' or 'off'|" + "'on' or 'off'|" "'ignore', 'maytrap' or 'strict'|" "'source', 'double' or 'extended'}2">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 4614324babb1c91..19f027848b177dc 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6755,7 +6755,7 @@ def warn_floatingpoint_eq : Warning< def err_setting_eval_method_used_in_unsafe_context : Error < "%select{'#pragma clang fp eval_method'|option 'ffp-eval-method'}0 cannot be used with " - "%select{option 'fapprox-func'|option 'mreassociate'|option 'freciprocal'|option 'ffp-eval-method'|'#pragma clang fp reassociate'}1">; + "%select{option 'fapprox-func'|option 'mreassociate'|option 'freciprocal'|option 'ffp-eval-method'|'#pragma clang fp reassociate'|'#pragma clang fp reciprocal'}1">; def warn_remainder_division_by_zero : Warning< "%select{remainder|division}0 by zero is undefined">, diff --git a/clang/include/clang/Basic/PragmaKinds.h b/clang/include/clang/Basic/PragmaKinds.h index 176bbc9ac7caaec..42f049f7323d2d4 100644 --- a/clang/include/clang/Basic/PragmaKinds.h +++ b/clang/include/clang/Basic/PragmaKinds.h @@ -34,6 +34,14 @@ enum Pragm
[clang] [llvm] [flang] [NFC][AMDGPU] Move address space enum to LLVM directory (PR #73944)
@@ -0,0 +1,31 @@ +//=== AMDGPUAddrSpace.h -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +/// \file +/// AMDGPU address space definition +/// +// +//===--===// + +#ifndef LLVM_SUPPORT_AMDGPUADDRSPACE_H +#define LLVM_SUPPORT_AMDGPUADDRSPACE_H + +namespace llvm { +namespace AMDGPU { +enum class AddrSpace { arsenm wrote: So this is still a second copy of the address space enum, another copy of which exists in AMDGPU.h (which does not use enum class, and uses different names). What's the plan to consolidate these? https://github.com/llvm/llvm-project/pull/73944 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][AMDGPU] Update amdgpu_waves_per_eu attr docs (PR #74587)
@@ -2659,8 +2659,9 @@ An error will be given if: - Specified values violate subtarget specifications; - Specified values are not compatible with values provided through other attributes; - - The AMDGPU target backend is unable to create machine code that can meet the -request. + +The AMDGPU target backend will emit a warning whenever it is unable to arsenm wrote: This looks indented differently, without the list - as the previous comment did https://github.com/llvm/llvm-project/pull/74587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][AMDGPU] Update amdgpu_waves_per_eu attr docs (PR #74587)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/74587 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits