[clang] [Clang] add option --offload-jobs=N (PR #135229)

2025-04-10 Thread Artem Belevich via cfe-commits
@@ -1233,6 +1233,10 @@ def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">, Flags<[HelpHidden]>, HelpText<"Compression level for offload device binaries (HIP only)">; +def offload_jobs_EQ : Joined<["--"], "offload-jobs=">, + HelpText<"Set the

[clang] [llvm] [mlir] [NVPTX] Add support for Shared Cluster Memory address space. (PR #135444)

2025-04-15 Thread Artem Belevich via cfe-commits
@@ -982,8 +982,9 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { case ADDRESS_SPACE_SHARED: Opc = TM.is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared; break; -case ADDRESS_SPACE_DSHARED: - Opc = TM.is64Bit() ? NVPTX::cvta_dshared_64 :

[clang] [llvm] [NVPTX] Cleanup and document nvvm.fabs intrinsics, adding f16 support (PR #135644)

2025-04-15 Thread Artem Belevich via cfe-commits
@@ -1034,6 +1034,10 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2: return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E, *this); + case NVPTX::BI__nvvm_abs_bf16

[clang] [CUDA][HIP] Add a __device__ version of std::__glibcxx_assert_fail() (PR #136133)

2025-04-21 Thread Artem Belevich via cfe-commits
@@ -0,0 +1,35 @@ +// libstdc++ uses the non-constexpr function std::__glibcxx_assert_fail() +// to trigger compilation errors when the __glibcxx_assert(cond) macro +// is used in a constexpr context. +// Compilation fails when using code from the libstdc++ (such as std::array) on

[clang] [clang][ARM][AArch64] Define intrinsics guarded by __has_builtin on all platforms (PR #128222)

2025-04-21 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. https://github.com/llvm/llvm-project/pull/128222 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [mlir] [NVPTX] Add support for Shared Cluster Memory address space. (PR #135444)

2025-04-21 Thread Artem Belevich via cfe-commits
@@ -25,6 +25,7 @@ enum AddressSpace : unsigned { ADDRESS_SPACE_CONST = 4, ADDRESS_SPACE_LOCAL = 5, ADDRESS_SPACE_TENSOR = 6, + ADDRESS_SPACE_SHARED_CLUSTER = 7, Artem-B wrote: PTX docs say: ``` If no sub-qualifier is specified with the .shared state sp

[clang] clang/OpenCL: Add baseline test showing broken codegen (PR #138862)

2025-05-07 Thread Artem Belevich via cfe-commits
@@ -109,3 +109,48 @@ void func2(void) { void func3(void) { float a[16][1] = {{0.}}; } + +// CL12-LABEL: define dso_local void @wrong_store_type_private_pointer_alloca( +// CL12-SAME: ) #[[ATTR0]] { +// CL12-NEXT: [[ENTRY:.*:]] +// CL12-NEXT:[[PLONG:%.*]] = alloca i64, al

[clang] clang/OpenCL: Add baseline test showing broken codegen (PR #138862)

2025-05-07 Thread Artem Belevich via cfe-commits
@@ -109,3 +109,48 @@ void func2(void) { void func3(void) { float a[16][1] = {{0.}}; } + +// CL12-LABEL: define dso_local void @wrong_store_type_private_pointer_alloca( +// CL12-SAME: ) #[[ATTR0]] { +// CL12-NEXT: [[ENTRY:.*:]] +// CL12-NEXT:[[PLONG:%.*]] = alloca i64, al

[clang] [CUDA] Add support for sm101 and sm120 target architectures (PR #127187)

2025-02-18 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. LGTM. Do you need help merging the patch? https://github.com/llvm/llvm-project/pull/127187 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/

[clang] [CUDA] Add support for sm101 and sm120 target architectures (PR #127187)

2025-02-18 Thread Artem Belevich via cfe-commits
@@ -300,6 +306,10 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1"); if (GPU == OffloadArch::SM_100a) Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1"); +if (GPU == OffloadArch::SM_

[clang] [CUDA][HIP] fix virtual dtor host/device attr (PR #128926)

2025-02-28 Thread Artem Belevich via cfe-commits
@@ -32,22 +32,24 @@ public: template class B; } -// The implicit host/device attrs of virtual dtor B::~B() is inferred to -// have implicit device attr since dtors of its members and parent classes can -// be executed on device. This causes a diagnostic since B::~B() must -//

[clang] [CUDA][HIP] fix virtual dtor host/device attr (PR #128926)

2025-02-28 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. https://github.com/llvm/llvm-project/pull/128926 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CUDA][HIP] fix virtual dtor host/device attr (PR #128926)

2025-02-28 Thread Artem Belevich via cfe-commits
@@ -32,22 +32,24 @@ public: template class B; } -// The implicit host/device attrs of virtual dtor B::~B() is inferred to -// have implicit device attr since dtors of its members and parent classes can -// be executed on device. This causes a diagnostic since B::~B() must -//

[clang] [CUDA][HIP] check dtor in deferred diag (PR #129117)

2025-02-27 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. https://github.com/llvm/llvm-project/pull/129117 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CUDA][HIP] check dtor in deferred diag (PR #129117)

2025-02-27 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B edited https://github.com/llvm/llvm-project/pull/129117 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CUDA][HIP] check dtor in deferred diag (PR #129117)

2025-02-27 Thread Artem Belevich via cfe-commits
@@ -1798,6 +1798,62 @@ class DeferredDiagnosticsEmitter Inherited::visitUsedDecl(Loc, D); } + // Visitor member and parent dtors called by this dtor. + void VisitCalledDestructors(CXXDestructorDecl *DD) { +const CXXRecordDecl *RD = DD->getParent(); + +// Visi

[clang] [CUDA][HIP] check dtor in deferred diag (PR #129117)

2025-02-27 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. LGTM functionally, some style nits. https://github.com/llvm/llvm-project/pull/129117 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-comm

[clang] [CUDA][HIP] check dtor in deferred diag (PR #129117)

2025-02-27 Thread Artem Belevich via cfe-commits
@@ -1798,6 +1798,62 @@ class DeferredDiagnosticsEmitter Inherited::visitUsedDecl(Loc, D); } + // Visitor member and parent dtors called by this dtor. + void VisitCalledDestructors(CXXDestructorDecl *DD) { +const CXXRecordDecl *RD = DD->getParent(); + +// Visi

[clang] [CUDA][HIP] check dtor in deferred diag (PR #129117)

2025-02-27 Thread Artem Belevich via cfe-commits
@@ -1798,6 +1798,62 @@ class DeferredDiagnosticsEmitter Inherited::visitUsedDecl(Loc, D); } + // Visitor member and parent dtors called by this dtor. + void VisitCalledDestructors(CXXDestructorDecl *DD) { +const CXXRecordDecl *RD = DD->getParent(); + +// Visi

[clang] [Clang] Fix 'gpuintrin.h' match when included with no arch set (PR #129927)

2025-03-05 Thread Artem Belevich via cfe-commits
@@ -179,8 +179,10 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x, _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) { // Newer targets can use the dedicated CUDA support. - if (__CUDA_ARCH__ >=

[clang] [Clang] Fix 'gpuintrin.h' match when included with no arch set (PR #129927)

2025-03-05 Thread Artem Belevich via cfe-commits
@@ -179,8 +179,10 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x, _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) { // Newer targets can use the dedicated CUDA support. - if (__CUDA_ARCH__ >=

[clang] [Clang] Add __has_target_builtin macro (PR #126324)

2025-02-12 Thread Artem Belevich via cfe-commits
@@ -96,6 +100,47 @@ the header file to conditionally make a function constexpr whenever the constant evaluation of the corresponding builtin (for example, ``std::fmax`` calls ``__builtin_fmax``) is supported in Clang. +``__has_target_builtin`` + +

[clang] [llvm] [mlir] [NVPTX] Convert vector function nvvm.annotations to attributes (PR #127736)

2025-02-19 Thread Artem Belevich via cfe-commits
@@ -5021,6 +5024,36 @@ bool llvm::UpgradeDebugInfo(Module &M) { return Modified; } +static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, +GlobalValue *GV, const Metadata *V) { + Function *F = cast(GV); + + constexpr

[clang] [llvm] [mlir] [NVPTX] Convert vector function nvvm.annotations to attributes (PR #127736)

2025-02-19 Thread Artem Belevich via cfe-commits
@@ -196,6 +198,36 @@ static std::optional getFnAttrParsedInt(const Function &F, : std::nullopt; } +static SmallVector getFnAttrParsedVector(const Function &F, + StringRef Attr) { + SmallVector V; + auto &Ctx

[clang] [llvm] [mlir] [NVPTX] Convert vector function nvvm.annotations to attributes (PR #127736)

2025-02-19 Thread Artem Belevich via cfe-commits
@@ -506,24 +507,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, // If the NVVM IR has some of reqntid* specified, then output // the reqntid directive, and set the unspecified ones to 1. // If none of Reqntid* is specified, don't output reqnti

[clang] [llvm] [mlir] [NVPTX] Convert vector function nvvm.annotations to attributes (PR #127736)

2025-02-19 Thread Artem Belevich via cfe-commits
@@ -5021,6 +5024,36 @@ bool llvm::UpgradeDebugInfo(Module &M) { return Modified; } +static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, +GlobalValue *GV, const Metadata *V) { + Function *F = cast(GV); + + constexpr

[clang] [Clang][Docs] Document -Xarch_ better (PR #127890)

2025-02-19 Thread Artem Belevich via cfe-commits
@@ -932,9 +932,18 @@ def W_Joined : Joined<["-"], "W">, Group, def Xanalyzer : Separate<["-"], "Xanalyzer">, HelpText<"Pass to the static analyzer">, MetaVarName<"">, Group; -def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>, - HelpText<"Pass to th

[clang] [llvm] [mlir] [NVPTX] Convert vector function nvvm.annotations to attributes (PR #127736)

2025-02-19 Thread Artem Belevich via cfe-commits
@@ -5059,6 +5092,18 @@ bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, cast(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV)); return true; } + if (K.consume_front("maxntid") && (K == "x" || K == "y" || K == "z")) { Artem-B wrot

[clang] [Clang][Docs] Document -Xarch_ better (PR #127890)

2025-02-19 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B edited https://github.com/llvm/llvm-project/pull/127890 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][Docs] Document -Xarch_ better (PR #127890)

2025-02-19 Thread Artem Belevich via cfe-commits
@@ -932,9 +932,18 @@ def W_Joined : Joined<["-"], "W">, Group, def Xanalyzer : Separate<["-"], "Xanalyzer">, HelpText<"Pass to the static analyzer">, MetaVarName<"">, Group; -def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>, - HelpText<"Pass to th

[clang] [Clang][Docs] Document -Xarch_ better (PR #127890)

2025-02-19 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B edited https://github.com/llvm/llvm-project/pull/127890 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [mlir] [NVPTX] Convert vector function nvvm.annotations to attributes (PR #127736)

2025-02-19 Thread Artem Belevich via cfe-commits
@@ -196,6 +198,36 @@ static std::optional getFnAttrParsedInt(const Function &F, : std::nullopt; } +static SmallVector getFnAttrParsedVector(const Function &F, + StringRef Attr) { + SmallVector V; + auto &Ctx

[clang] [llvm] [mlir] [NVPTX] Convert vector function nvvm.annotations to attributes (PR #127736)

2025-02-19 Thread Artem Belevich via cfe-commits
@@ -5021,6 +5024,36 @@ bool llvm::UpgradeDebugInfo(Module &M) { return Modified; } +static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, +GlobalValue *GV, const Metadata *V) { + Function *F = cast(GV); + + constexpr

[clang] [Clang] Add __has_target_builtin macro (PR #126324)

2025-02-21 Thread Artem Belevich via cfe-commits
Artem-B wrote: > > I'd vote for fixing the CUDA on Arm case that failed in the meantime, then > > make a decision as to whether or not we should go back to `__has_builtin` > > only returning the current compilation target once that's gone. > > +1, if we can get away with it. +1 to that. http

[clang] [Clang][ARM] Only try to redefine builtins for non-CUDA (PR #128222)

2025-02-21 Thread Artem Belevich via cfe-commits
@@ -27,6 +27,8 @@ extern "C" { #endif +#if !defined(__CUDA_ARCH__) + Artem-B wrote: Unfortunately, this will be observable to any code that happens to need those builtins, and they are actually not provided by the compiler (i.e. `__has_builtin(__wfi)` would

[clang] [Clang][ARM] Only try to redefine builtins for non-CUDA (PR #128222)

2025-02-21 Thread Artem Belevich via cfe-commits
@@ -27,6 +27,8 @@ extern "C" { #endif +#if !defined(__CUDA_ARCH__) + Artem-B wrote: I'm actually wondering if the header needs `__has_builtin()`... They are coming from the same compiler, and, if they are missing, they are declared in terms of `__builtin*()

[clang] [Clang][ARM] Only try to redefine builtins for non-CUDA (PR #128222)

2025-02-21 Thread Artem Belevich via cfe-commits
@@ -27,6 +27,8 @@ extern "C" { #endif +#if !defined(__CUDA_ARCH__) + Artem-B wrote: ``` #if __CUDA_ARCH__ // Make sure the declarations here are in sync with the functions provided under #else. #else #endif ``` https://github.com/llvm/llvm-project/pull/1

[clang] [CUDA][HIP] fix virtual dtor host/device attr (PR #128926)

2025-02-26 Thread Artem Belevich via cfe-commits
Artem-B wrote: Would it make sense to separate into separate patches deferred diag fix (1a/1b on your list) from inference of destructor attributes? Deferred diags fix is straightforward, but destructor attribute inference may need a longer discussion. https://github.com/llvm/llvm-project/pul

[clang] Reland [HIP] fix host min/max in header (PR #133590)

2025-03-31 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. https://github.com/llvm/llvm-project/pull/133590 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] cuda clang: Add support for CUDA surfaces (PR #132883)

2025-04-04 Thread Artem Belevich via cfe-commits
@@ -0,0 +1,3329 @@ +// RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -O3 -o - %s -emit-llvm | FileCheck %s +// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -O3 -o - %s -emit-llvm | FileCheck %s +#include "../Headers/Inputs/include/cuda.h" ---

[clang] [llvm] [NVPTX] Add intrinsics for cvt .f6x2 and .ue8m0x2 variants (PR #134345)

2025-04-04 Thread Artem Belevich via cfe-commits
@@ -596,6 +605,28 @@ def __nvvm_e4m3x2_to_f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(sh def __nvvm_e5m2x2_to_f16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(short)", SM_89, PTX81>; def __nvvm_e5m2x2_to_f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>

[clang] [llvm] [NVPTX] Add intrinsics for cvt .f6x2 and .ue8m0x2 variants (PR #134345)

2025-04-04 Thread Artem Belevich via cfe-commits
@@ -1021,6 +1036,174 @@ __device__ void nvvm_cvt_sm89() { __nvvm_e5m2x2_to_f16x2_rn(0x4c4c); // CHECK_PTX81_SM89: call <2 x half> @llvm.nvvm.e5m2x2.to.f16x2.rn.relu(i16 19532) __nvvm_e5m2x2_to_f16x2_rn_relu(0x4c4c); + + // CHECK_PTX81_SM89: call i32 @llvm.nvvm.f2tf32.rn

[clang] [llvm] [NVPTX] Add intrinsics for cvt .f6x2 and .ue8m0x2 variants (PR #134345)

2025-04-04 Thread Artem Belevich via cfe-commits
@@ -703,6 +703,53 @@ let hasSideEffects = false in { defm CVT_to_tf32_rz_satf : CVT_TO_TF32<"rz.satfinite", [hasPTX<86>, hasSM<100>]>; defm CVT_to_tf32_rn_relu_satf : CVT_TO_TF32<"rn.relu.satfinite", [hasPTX<86>, hasSM<100>]>; defm CVT_to_tf32_rz_relu_satf : CVT_TO_TF

[clang] [llvm] [NVPTX] Add intrinsics for cvt .f6x2 and .ue8m0x2 variants (PR #134345)

2025-04-04 Thread Artem Belevich via cfe-commits
@@ -580,6 +580,15 @@ def __nvvm_f2bf16_rz : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>; def __nvvm_f2bf16_rz_relu : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>; def __nvvm_f2tf32_rna : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_80, PTX70>; +def __nvvm_f2tf32_

[clang] cuda clang: Clean up test dependency for CUDA surfaces (PR #134459)

2025-04-04 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. https://github.com/llvm/llvm-project/pull/134459 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [NVPTX] Add intrinsics and clang builtins for conversions of f4x2 type (PR #139244)

2025-05-09 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. https://github.com/llvm/llvm-project/pull/139244 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CUDA] fix wrapper cmath header to match #136101 (PR #139164)

2025-05-12 Thread Artem Belevich via cfe-commits
Artem-B wrote: @jhuber6 @ldionne One concern I have for this change is that it will break folks who will use older libc++ with the new Clang + wrapper headers. Is older libc++ expected to work with non-matching clang version? If the expectation is that libc++ and clang are from the same versio

[clang] [CUDA] Remove obsolete GPU-side __constexpr_* wrappers. (PR #139164)

2025-05-12 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B closed https://github.com/llvm/llvm-project/pull/139164 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CUDA] fix wrapper cmath header to match #136101 (PR #139164)

2025-05-12 Thread Artem Belevich via cfe-commits
Artem-B wrote: > Right now this checks for `libc++` less than 14. Is that still relevant > following that change? That's a very good point. Looks like those `__constexpr_fmin/fmax` are gone now and we do not heed them any more. https://github.com/llvm/llvm-project/pull/139164

[clang] [CUDA] fix wrapper cmath header to match #136101 (PR #139164)

2025-05-12 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B updated https://github.com/llvm/llvm-project/pull/139164 >From a1d60feed11174b9d2106b57ee15ff6d9bc56fa4 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Thu, 8 May 2025 14:43:47 -0700 Subject: [PATCH] [CUDA] remove obsolete GPU-side __constexpr* wrappers libc++ no

[clang] [CUDA] Remove obsolete GPU-side __constexpr_* wrappers. (PR #139164)

2025-05-12 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B edited https://github.com/llvm/llvm-project/pull/139164 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CUDA] Remove obsolete GPU-side __constexpr_* wrappers. (PR #139164)

2025-05-12 Thread Artem Belevich via cfe-commits
Artem-B wrote: No wrappers -- no problems. :-) https://github.com/llvm/llvm-project/pull/139164 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CUDA][HIP] Fix host/device attribute of builtin (PR #138162)

2025-05-07 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. https://github.com/llvm/llvm-project/pull/138162 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HIP] change default offload archs (PR #139281)

2025-05-09 Thread Artem Belevich via cfe-commits
Artem-B wrote: @cgmb > I would suggest that we should either (a) change the default GPU target to > native and make the failure to detect the user’s GPU into a hard compiler > error, or (b) change the default GPU target to SPIR-V so that it works on > every machine. The thing is that the se

[clang] [HIP] change default offload archs (PR #139281)

2025-05-09 Thread Artem Belevich via cfe-commits
Artem-B wrote: @jhuber6 do you think can we use `native` instead? I think it would be a somewhat better option here. If we have to choose a GPU variant by default, we may as well choose the actual GPU, rather than a conditional choice between generic SPIR-V or an old GPU, which has the disadva

[clang] [llvm] [NVPTX] Add errors for incorrect CUDA addrpaces (PR #138706)

2025-05-13 Thread Artem Belevich via cfe-commits
@@ -1399,19 +1399,27 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { if (PTy) { O << "\t.param .u" << PTySizeInBits << " .ptr"; +bool IsCUDA = static_cast(TM).getDrvInterface() == + NVPTX::CUDA;

[clang] [llvm] [NVPTX] Add errors for incorrect CUDA addrpaces (PR #138706)

2025-05-19 Thread Artem Belevich via cfe-commits
@@ -2927,6 +2928,20 @@ void Verifier::visitFunction(const Function &F) { "Calling convention does not support varargs or " "perfect forwarding!", &F); +if (F.getCallingConv() == CallingConv::PTX_Kernel && +TT.getOS() == Triple::CUDA) {

[clang] [llvm] [NVPTX] Add errors for incorrect CUDA addrpaces (PR #138706)

2025-05-19 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B edited https://github.com/llvm/llvm-project/pull/138706 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [NVPTX] Support the OpenCL generic addrspace feature by default (PR #137940)

2025-05-19 Thread Artem Belevich via cfe-commits
@@ -170,6 +170,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { Opts["cl_khr_global_int32_extended_atomics"] = true; Opts["cl_khr_local_int32_base_atomics"] = true; Opts["cl_khr_local_int32_extended_atomics"] = true; + +Opts["__opencl_c_

[clang] [llvm] [mlir] [NVPTX] Unify and extend barrier{.cta} intrinsic support (PR #140615)

2025-05-19 Thread Artem Belevich via cfe-commits
@@ -1349,6 +1349,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f" || Name == "swap.lo.hi.b64") Expand = true; + else if (Name == "barrier0" || Name == "b

[clang] [llvm] [NVPTX] Add pm_event intrinsics (PR #141278)

2025-05-27 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B edited https://github.com/llvm/llvm-project/pull/141278 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [NVPTX] Add pm_event intrinsics (PR #141278)

2025-05-27 Thread Artem Belevich via cfe-commits
@@ -177,6 +177,7 @@ let Attributes = [NoReturn] in { } let Attributes = [NoThrow] in { def __nvvm_nanosleep : NVPTXBuiltinSMAndPTX<"void(unsigned int)", SM_70, PTX63>; + def __nvvm_pm_event_mask : NVPTXBuiltin<"void(unsigned short)">; Artem-B wrote: The ar

[clang] [llvm] [NVPTX] Add pm_event intrinsics (PR #141278)

2025-05-27 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. Builtin signature needs a fix, but LGTM otherwise. https://github.com/llvm/llvm-project/pull/141278 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/li

[clang] [clang] Move opt level in clang toolchain to clang::ConstructJob start (PR #141036)

2025-05-27 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. https://github.com/llvm/llvm-project/pull/141036 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CUDA][HIP] add option -gpuinc (PR #140106)

2025-05-15 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B commented: Being able to override a flag is a good thing to have, IMO. There are builds where the owner of the leaf targets do not have much control over which options are set by the "default" compilation, so they need to rely on being able to override preceding opti

[clang] [CUDA][HIP] add option -gpuinc (PR #140106)

2025-05-15 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B edited https://github.com/llvm/llvm-project/pull/140106 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CUDA][HIP] add option -gpuinc (PR #140106)

2025-05-15 Thread Artem Belevich via cfe-commits
@@ -5734,6 +5734,9 @@ def nobuiltininc : Flag<["-"], "nobuiltininc">, def nogpuinc : Flag<["-"], "nogpuinc">, Group, HelpText<"Do not add include paths for CUDA/HIP and" " do not include the default CUDA/HIP wrapper headers">; +def gpuinc : Flag<["-"], "gpuinc">, Group, +

[clang] [CUDA][HIP] add option -gpuinc (PR #140106)

2025-05-15 Thread Artem Belevich via cfe-commits
@@ -5734,6 +5734,9 @@ def nobuiltininc : Flag<["-"], "nobuiltininc">, def nogpuinc : Flag<["-"], "nogpuinc">, Group, HelpText<"Do not add include paths for CUDA/HIP and" " do not include the default CUDA/HIP wrapper headers">; +def gpuinc : Flag<["-"], "gpuinc">, Group, +

[clang] [llvm] [mlir] Reland "[NVPTX] Unify and extend barrier{.cta} intrinsic support" (PR #141143)

2025-05-22 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. https://github.com/llvm/llvm-project/pull/141143 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Revert "Add missing intrinsics to cuda headers" (PR #144755)

2025-06-18 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B created https://github.com/llvm/llvm-project/pull/144755 Reverts llvm/llvm-project#143664 as it breaks CUDA compilation. >From 2ed0932a540bb1a692fe442ab590d51674645f6c Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Wed, 18 Jun 2025 10:06:56 -0700 Subject: [PATCH

[clang] Add missing intrinsics to cuda headers (PR #143664)

2025-06-18 Thread Artem Belevich via cfe-commits
Artem-B wrote: It appears to be breaking CUDA tests: https://lab.llvm.org/buildbot/#/builders/69/builds/22559 I'll revert it for now and we'll try again later. ``` [29/988] Building CXX object External/CUDA/CMakeFiles/math_h-cuda-11.8-c++98-libstdc++-10.dir/math_h.cu.o FAILED: External/CUDA/

[clang] Add missing intrinsics to cuda headers (PR #143664)

2025-06-18 Thread Artem Belevich via cfe-commits
Artem-B wrote: It's a C++-11 feature. Tests still include c++98. We do not intend to keep everything working with c++98 (we already use c++11 in other headers), but we should not break it either. In this case, you can just enable the new stuff for c++11 or newer standards. https://github.com/

[clang] [CUDA][HIP] Add a __device__ version of std::__glibcxx_assert_fail() (PR #136133)

2025-06-18 Thread Artem Belevich via cfe-commits
Artem-B wrote: @jmmartinez It appears that CUDA tests are broken by this change: https://lab.llvm.org/buildbot/#/builders/69/builds/22562/steps/8/logs/stdio ``` FAILED: External/CUDA/CMakeFiles/algorithm-cuda-11.8-c++98-libstdc++-10.dir/algorithm.cu.o /buildbot/cuda-t4-0/work/clang-cuda-t4/c

[clang] Revert "Add missing intrinsics to cuda headers" (PR #144755)

2025-06-18 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B closed https://github.com/llvm/llvm-project/pull/144755 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Add missing intrinsics to cuda headers (PR #143664)

2025-06-17 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B closed https://github.com/llvm/llvm-project/pull/143664 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CUDA][HIP] add options `--[no-]offload-inc` (PR #140106)

2025-06-17 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B edited https://github.com/llvm/llvm-project/pull/140106 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CUDA][HIP] add options `--[no-]offload-inc` (PR #140106)

2025-06-17 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. https://github.com/llvm/llvm-project/pull/140106 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [NVPTX] Enable OpenCL 3d_image_writes support (PR #143331)

2025-06-09 Thread Artem Belevich via cfe-commits
Artem-B wrote: @svenvh appears to be the current maintainer of OpenCL in LLVM. https://github.com/llvm/llvm-project/pull/143331 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CUDA] Disallow use of address_space(N) on CUDA device variables. (PR #142857)

2025-06-09 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B closed https://github.com/llvm/llvm-project/pull/142857 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Add missing intrinsics to cuda headers (PR #143664)

2025-06-13 Thread Artem Belevich via cfe-commits
@@ -479,7 +479,291 @@ inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32, return ret; } -#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320 +#pragma push_macro("__INTRINSIC_LOAD") +#define __INTRINSIC_LOAD(__FnName, __AsmOp, __DeclType, __Tmp

[clang] Add missing intrinsics to cuda headers (PR #143664)

2025-06-13 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. LGTM with one last nit. https://github.com/llvm/llvm-project/pull/143664 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CUDA] Disallow use of address_space(N) on CUDA device variables. (PR #142857)

2025-06-04 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B created https://github.com/llvm/llvm-project/pull/142857 The variables have implicit host-side shadow instances and explicit address space attribute breaks them on the host. >From e2e8da0271ae11711dbd54f6e8d9ff498f3226d4 Mon Sep 17 00:00:00 2001 From: Artem Belevich

[clang] [CUDA] Disallow use of address_space(N) on CUDA device variables. (PR #142857)

2025-06-06 Thread Artem Belevich via cfe-commits
Artem-B wrote: @yxsamliu Sam, do you have any thoughts on this? https://github.com/llvm/llvm-project/pull/142857 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Add missing intrinsics to cuda headers (PR #143664)

2025-06-12 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B requested changes to this pull request. Nice. I like this approach better. There are few more things to polish up, but it looks good overall. https://github.com/llvm/llvm-project/pull/143664 ___ cfe-commits mailing list cfe-

[clang] Add missing intrinsics to cuda headers (PR #143664)

2025-06-12 Thread Artem Belevich via cfe-commits
@@ -479,6 +479,275 @@ inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32, return ret; } +#define INTRINSIC_LOAD(func_name, asm_op, decl_type, internal_type, asm_type) \ Artem-B wrote: Can we merge `INTRINSIC*` and `MINTRINSIC*` mac

[clang] Add missing intrinsics to cuda headers (PR #143664)

2025-06-12 Thread Artem Belevich via cfe-commits
@@ -479,6 +479,275 @@ inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32, return ret; } +#define INTRINSIC_LOAD(func_name, asm_op, decl_type, internal_type, asm_type) \ Artem-B wrote: We have to be careful with the names used in th

[clang] Add missing intrinsics to cuda headers (PR #143664)

2025-06-12 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B edited https://github.com/llvm/llvm-project/pull/143664 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Add missing intrinsics to cuda headers (PR #143664)

2025-06-11 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B approved this pull request. https://github.com/llvm/llvm-project/pull/143664 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [NVPTX] Consolidate and cleanup various NVPTXISD nodes (NFC) (PR #145581)

2025-06-25 Thread Artem Belevich via cfe-commits
@@ -457,3 +457,25 @@ void NVPTXInstPrinter::printCTAGroup(const MCInst *MI, int OpNum, } llvm_unreachable("Invalid cta_group in printCTAGroup"); } + +void NVPTXInstPrinter::printCallOperand(const MCInst *MI, int OpNum, +raw_ostream &

[clang] [llvm] [NVPTX] Consolidate and cleanup various NVPTXISD nodes (NFC) (PR #145581)

2025-06-25 Thread Artem Belevich via cfe-commits
@@ -457,3 +457,25 @@ void NVPTXInstPrinter::printCTAGroup(const MCInst *MI, int OpNum, } llvm_unreachable("Invalid cta_group in printCTAGroup"); } + +void NVPTXInstPrinter::printCallOperand(const MCInst *MI, int OpNum, +raw_ostream &

[clang] [Clang] Extract offloading code from static libs with 'offload-arch=' (PR #147823)

2025-07-09 Thread Artem Belevich via cfe-commits
Artem-B wrote: > but we now assume that if the user specified --offload-arch= on the link job, > they definitely want that architecture to be used if it exists. That would be my assumption, too. Do we currently just ignore `--offload-arch=` for the linking phase? With the patch, what's expe

[clang] [HIP] Add warning for -mwavefrontsize64 on gfx10+ architectures (PR #140185)

2025-07-14 Thread Artem Belevich via cfe-commits
@@ -67,6 +67,12 @@ // DUP-NOT: "-target-feature" "{{.*}}wavefrontsize64" // DUP: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+cumode" +// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \ +// RUN: -nogpuinc --offload-arch=gfx1010 --no-offload-new-driver %s \ +// RUN:

[clang] [llvm] [LLVM] Add GNU make jobserver support (PR #145131)

2025-07-14 Thread Artem Belevich via cfe-commits
@@ -0,0 +1,141 @@ +//===- llvm/Support/Jobserver.h - Jobserver Client --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Ap

[clang] [llvm] [LLVM] Add GNU make jobserver support (PR #145131)

2025-07-14 Thread Artem Belevich via cfe-commits
@@ -0,0 +1,141 @@ +//===- llvm/Support/Jobserver.h - Jobserver Client --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Ap

[clang] [llvm] [LLVM] Add GNU make jobserver support (PR #145131)

2025-07-14 Thread Artem Belevich via cfe-commits
@@ -0,0 +1,257 @@ +//===- llvm/Support/Jobserver.cpp - Jobserver Client Implementation ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Ap

[clang] [llvm] [LLVM] Add GNU make jobserver support (PR #145131)

2025-07-14 Thread Artem Belevich via cfe-commits
@@ -1420,12 +1420,18 @@ int main(int Argc, char **Argv) { parallel::strategy = hardware_concurrency(1); if (auto *Arg = Args.getLastArg(OPT_wrapper_jobs)) { -unsigned Threads = 0; -if (!llvm::to_integer(Arg->getValue(), Threads) || Threads == 0) - reportError(

[clang] [llvm] [LLVM] Add GNU make jobserver support (PR #145131)

2025-07-14 Thread Artem Belevich via cfe-commits
https://github.com/Artem-B commented: Few comments on syntax/style. I didn't look at the job management logic itself. https://github.com/llvm/llvm-project/pull/145131 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bi

[clang] [llvm] [LLVM] Add GNU make jobserver support (PR #145131)

2025-07-14 Thread Artem Belevich via cfe-commits
@@ -0,0 +1,141 @@ +//===- llvm/Support/Jobserver.h - Jobserver Client --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Ap

[clang] [llvm] [LLVM] Add GNU make jobserver support (PR #145131)

2025-07-14 Thread Artem Belevich via cfe-commits
@@ -0,0 +1,257 @@ +//===- llvm/Support/Jobserver.cpp - Jobserver Client Implementation ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Ap

[clang] [llvm] [LLVM] Add GNU make jobserver support (PR #145131)

2025-07-14 Thread Artem Belevich via cfe-commits
@@ -0,0 +1,257 @@ +//===- llvm/Support/Jobserver.cpp - Jobserver Client Implementation ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Ap

<    8   9   10   11   12   13   14   >