from:"Fabian Ritter via cfe\-commits"

[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

2024-09-03 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

Ping.

https://github.com/llvm/llvm-project/pull/103031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

2024-09-10 Thread Fabian Ritter via cfe-commits



@@ -0,0 +1,703 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only 
-verify=expected,onhost %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device 
-verify=expected,ondevice %s
+
+
+// Tests to ensure that functions with host and device overloads in that are
+// called outside of function bodies and variable initializers, e.g., in
+// template arguments are resolved with respect to the declaration to which 
they
+// belong.
+
+// Opaque types used for tests:
+struct DeviceTy {};
+struct HostTy {};
+struct HostDeviceTy {};
+struct TemplateTy {};
+
+struct TrueTy { static const bool value = true; };
+struct FalseTy { static const bool value = false; };
+
+// Select one of two types based on a boolean condition.
+template  struct select_type {};
+template  struct select_type { typedef T 
type; };
+template  struct select_type { typedef F 
type; };
+
+template  struct check : public select_type { };
+
+// Check if two types are the same.
+template struct is_same : public FalseTy { };
+template struct is_same : public TrueTy { };
+
+// A static assertion that fails at compile time if the expression E does not
+// have type T.
+#define ASSERT_HAS_TYPE(E, T) static_assert(is_same::value);
+
+
+// is_on_device() is true when called in a device context and false if called 
in a host context.
+__attribute__((host)) constexpr bool is_on_device(void) { return false; }
+__attribute__((device)) constexpr bool is_on_device(void) { return true; }
+
+
+// this type depends on whether it occurs in host or device code
+#define targetdep_t select_type::type
+
+// Defines and typedefs with different values in host and device compilation.
+#ifdef __CUDA_ARCH__
+#define CurrentTarget DEVICE
+typedef DeviceTy CurrentTargetTy;
+typedef DeviceTy TemplateIfHostTy;
+#else
+#define CurrentTarget HOST
+typedef HostTy CurrentTargetTy;
+typedef TemplateTy TemplateIfHostTy;
+#endif
+
+
+
+// targetdep_t in function declarations should depend on the target of the
+// declared function.
+__attribute__((device)) targetdep_t decl_ret_early_device(void);
+ASSERT_HAS_TYPE(decl_ret_early_device(), DeviceTy)
+
+__attribute__((host)) targetdep_t decl_ret_early_host(void);
+ASSERT_HAS_TYPE(decl_ret_early_host(), HostTy)
+
+__attribute__((host,device)) targetdep_t decl_ret_early_host_device(void);
+ASSERT_HAS_TYPE(decl_ret_early_host_device(), CurrentTargetTy)
+
+// If the function target is specified too late and can therefore not be
+// considered for overload resolution in targetdep_t, warn.
+targetdep_t __attribute__((device)) decl_ret_late_device(void); // 
expected-warning {{target attribute has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_late_device(), HostTy)
+
+// No warning necessary if the ignored attribute doesn't change the result.
+targetdep_t __attribute__((host)) decl_ret_late_host(void);
+ASSERT_HAS_TYPE(decl_ret_late_host(), HostTy)
+
+targetdep_t __attribute__((host,device)) decl_ret_late_host_device(void); // 
expected-warning {{target attribute has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_late_host_device(), HostTy)
+
+// An odd way of writing this, but it's possible.
+__attribute__((device)) targetdep_t __attribute__((host)) 
decl_ret_early_device_late_host(void); // expected-warning {{target attribute 
has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_early_device_late_host(), DeviceTy)
+
+
+// The same for function definitions and parameter types:
+__attribute__((device)) targetdep_t ret_early_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+__attribute__((host)) targetdep_t ret_early_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((host, device)) targetdep_t ret_early_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+// The parameter is still after the attribute, so it needs no warning.
+targetdep_t __attribute__((device)) // expected-warning {{target attribute has 
been ignored for overload resolution}}
+ret_late_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+targetdep_t __attribute__((host, device)) // expected-warning {{target 
attribute has been ignored for overload resolution}}
+ret_late_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((device)) targetdep_t __attribute__((host)) // expected-warning 
{{target attribute has been ignored for overload resolution}}
+ret_early_device_late_host(targetdep_t x)

[clang] [Clang][HIP] Suppress availability diagnostics for mismatched host/device overloads (PR #93546)

2024-06-14 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

Ping @zygoloid, your input would be highly appreciated. Thanks!

https://github.com/llvm/llvm-project/pull/93546
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Suppress availability diagnostics for mismatched host/device overloads (PR #93546)

2024-05-28 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a created 
https://github.com/llvm/llvm-project/pull/93546

Outside of function bodies, the resolution of host/device overloads for 
functions in HIP/CUDA operates as if in a host-device context. This means that 
the device overload is used in the device compilation phase and the host 
overload is used in the host compilation phase.

Therefore, the following code would cause a deprecation warning during host 
compilation, even though val is only used as part of a device function:

```C++
__attribute__((host, deprecated)) constexpr int val(void) {return 1;}
__attribute__((device)) constexpr int val(void) {return 1;}
__attribute__((device)) std::enable_if<(val() > 0), int>::type fun(void) {
return 42;
}
```

As only the available device overload is used during device compilation, where 
code for fun is actually generated, this diagnostic is spurious.

This patch suppresses availability diagnostics in such situations: When an 
unavailable host function is used in a device context during host compilation 
or when an unavailable device function is used in a host context during device 
compilation.

This change is necessary to avoid spurious warnings with #91478, e.g., in the 
rocPRIM library.

>From d88250fc1493ed8ab2780678deb15620b1897620 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Tue, 28 May 2024 07:45:52 -0400
Subject: [PATCH] [Clang][HIP] Suppress availability diagnostics for mismatched
 host/device overloads

Outside of function bodies, the resolution of host/device overloads for
functions in HIP/CUDA operates as if in a host-device context. This means that
the device overload is used in the device compilation phase and the host
overload is used in the host compilation phase.

Therefore, the following code would cause a deprecation warning during host
compilation, even though val is only used as part of a device function:

__attribute__((host, deprecated)) constexpr int val(void) {return 1;}
__attribute__((device)) constexpr int val(void) {return 1;}
__attribute__((device)) std::enable_if<(val() > 0), int>::type fun(void) {
return 42;
}

As only the available device overload is used during device compilation, where
code for fun is actually generated, this diagnostic is spurious.

This patch suppresses availability diagnostics in such situations: When an
unavailable host function is used in a device context during host compilation
or when an unavailable device function is used in a host context during device
compilation.
---
 clang/lib/Sema/SemaAvailability.cpp   |  53 +++
 ...lability-warnings-mismatched-attributes.cu | 149 ++
 2 files changed, 202 insertions(+)
 create mode 100644 
clang/test/SemaCUDA/suppress-availability-warnings-mismatched-attributes.cu

diff --git a/clang/lib/Sema/SemaAvailability.cpp 
b/clang/lib/Sema/SemaAvailability.cpp
index 22f5a2f663477..984789489098a 100644
--- a/clang/lib/Sema/SemaAvailability.cpp
+++ b/clang/lib/Sema/SemaAvailability.cpp
@@ -20,6 +20,7 @@
 #include "clang/Sema/DelayedDiagnostic.h"
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/Sema.h"
+#include "clang/Sema/SemaCUDA.h"
 #include "clang/Sema/SemaObjC.h"
 #include "llvm/ADT/StringRef.h"
 #include 
@@ -156,6 +157,58 @@ static bool ShouldDiagnoseAvailabilityInContext(
 }
   }
 
+  if (S.getLangOpts().CUDA || S.getLangOpts().HIP) {
+// In CUDA/HIP, do not diagnose uses of unavailable host or device function
+// overloads when they occur in the context of a Decl with an explicitly
+// given opposite target.
+// We encounter this if the OffendingDecl is used outside of a function
+// body, e.g., in template arguments for a function's return or parameter
+// types. In this case, overloads of the called function are resolved as if
+// in a host-device context, i.e., the device overload is chosen in the
+// device compilation phase and the host overload in the host compilation
+// phase. As code is only generated for the variant with matching targets,
+// an availabiliy diagnostic for the variant with non-matching targets 
would
+// be spurious.
+
+if (auto *OffendingFunDecl = llvm::dyn_cast(OffendingDecl)) {
+  Decl *ActualCtx = Ctx;
+  if (auto *FTD = llvm::dyn_cast(Ctx)) {
+// Attributes of template Decls are only on the templated Decl
+ActualCtx = FTD->getTemplatedDecl();
+  }
+  if (auto *CtxFun = llvm::dyn_cast(ActualCtx)) {
+auto TargetIs = [&S](const FunctionDecl *FD, CUDAFunctionTarget FT) {
+  return S.CUDA().IdentifyTarget(FD, /* IgnoreImplicitHDAttr */ true) 
==
+ FT;
+};
+
+bool CtxIsHost = TargetIs(CtxFun, CUDAFunctionTarget::Host);
+bool CtxIsDevice = TargetIs(CtxFun, CUDAFunctionTarget::Device);
+
+bool OffendingDeclIsHost =
+TargetIs(OffendingFunDecl, CUDAFunctionTarget::Host);
+bool OffendingDeclIsDevice =
+TargetIs(OffendingFunDecl, CUDAFunct

[clang] [Clang][HIP] Suppress availability diagnostics for mismatched host/device overloads (PR #93546)

2024-05-28 Thread Fabian Ritter via cfe-commits

ritter-x2a wrote:

> If we are adding a special case for handling overloads, perhaps a better 
> approach would be to consider inferring the caller context from the 
> enveloping function declaration attributes, and allow overload resolution to 
> pick a device function instead. It would avoid the errors you're trying to 
> suppress, and it will arguably make things more consistent -- the function 
> declaration will have the same signature in both host and device compilations.

One problem I encountered when I looked into such a solution is that the 
host/device attribute(s) are not necessarily parsed at the time when the 
overload resolution in template arguments of the return type happens, because 
the `__attribute__((device))` can come after the return type specifier (cf. the 
`DeviceUserOverloadFunHostDepr2` function in the test case).
So far I haven't found an existing mechanism in clang to solve this without a 
bigger change that introduces some sort of backtracking. I'd be happy about 
pointers in that direction.

https://github.com/llvm/llvm-project/pull/93546
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Suppress availability diagnostics for mismatched host/device overloads (PR #93546)

2024-06-05 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

Ping.

https://github.com/llvm/llvm-project/pull/93546
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

2024-08-27 Thread Fabian Ritter via cfe-commits



@@ -0,0 +1,703 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only 
-verify=expected,onhost %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device 
-verify=expected,ondevice %s
+
+
+// Tests to ensure that functions with host and device overloads in that are
+// called outside of function bodies and variable initializers, e.g., in
+// template arguments are resolved with respect to the declaration to which 
they
+// belong.
+
+// Opaque types used for tests:
+struct DeviceTy {};
+struct HostTy {};
+struct HostDeviceTy {};
+struct TemplateTy {};
+
+struct TrueTy { static const bool value = true; };
+struct FalseTy { static const bool value = false; };
+
+// Select one of two types based on a boolean condition.
+template  struct select_type {};
+template  struct select_type { typedef T 
type; };
+template  struct select_type { typedef F 
type; };
+
+template  struct check : public select_type { };
+
+// Check if two types are the same.
+template struct is_same : public FalseTy { };
+template struct is_same : public TrueTy { };
+
+// A static assertion that fails at compile time if the expression E does not
+// have type T.
+#define ASSERT_HAS_TYPE(E, T) static_assert(is_same::value);
+
+
+// is_on_device() is true when called in a device context and false if called 
in a host context.
+__attribute__((host)) constexpr bool is_on_device(void) { return false; }
+__attribute__((device)) constexpr bool is_on_device(void) { return true; }
+
+
+// this type depends on whether it occurs in host or device code
+#define targetdep_t select_type::type
+
+// Defines and typedefs with different values in host and device compilation.
+#ifdef __CUDA_ARCH__
+#define CurrentTarget DEVICE
+typedef DeviceTy CurrentTargetTy;
+typedef DeviceTy TemplateIfHostTy;
+#else
+#define CurrentTarget HOST
+typedef HostTy CurrentTargetTy;
+typedef TemplateTy TemplateIfHostTy;
+#endif
+
+
+
+// targetdep_t in function declarations should depend on the target of the
+// declared function.
+__attribute__((device)) targetdep_t decl_ret_early_device(void);
+ASSERT_HAS_TYPE(decl_ret_early_device(), DeviceTy)
+
+__attribute__((host)) targetdep_t decl_ret_early_host(void);
+ASSERT_HAS_TYPE(decl_ret_early_host(), HostTy)
+
+__attribute__((host,device)) targetdep_t decl_ret_early_host_device(void);
+ASSERT_HAS_TYPE(decl_ret_early_host_device(), CurrentTargetTy)
+
+// If the function target is specified too late and can therefore not be
+// considered for overload resolution in targetdep_t, warn.
+targetdep_t __attribute__((device)) decl_ret_late_device(void); // 
expected-warning {{target attribute has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_late_device(), HostTy)
+
+// No warning necessary if the ignored attribute doesn't change the result.
+targetdep_t __attribute__((host)) decl_ret_late_host(void);
+ASSERT_HAS_TYPE(decl_ret_late_host(), HostTy)
+
+targetdep_t __attribute__((host,device)) decl_ret_late_host_device(void); // 
expected-warning {{target attribute has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_late_host_device(), HostTy)
+
+// An odd way of writing this, but it's possible.
+__attribute__((device)) targetdep_t __attribute__((host)) 
decl_ret_early_device_late_host(void); // expected-warning {{target attribute 
has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_early_device_late_host(), DeviceTy)
+
+
+// The same for function definitions and parameter types:
+__attribute__((device)) targetdep_t ret_early_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+__attribute__((host)) targetdep_t ret_early_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((host, device)) targetdep_t ret_early_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+// The parameter is still after the attribute, so it needs no warning.
+targetdep_t __attribute__((device)) // expected-warning {{target attribute has 
been ignored for overload resolution}}
+ret_late_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+targetdep_t __attribute__((host, device)) // expected-warning {{target 
attribute has been ignored for overload resolution}}
+ret_late_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((device)) targetdep_t __attribute__((host)) // expected-warning 
{{target attribute has been ignored for overload resolution}}
+ret_early_device_late_host(targetdep_t x)

[clang] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code (PR #91478)

2024-05-08 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a created 
https://github.com/llvm/llvm-project/pull/91478

The `__AMDGCN_WAVEFRONT_SIZE` and `__AMDGCN_WAVEFRONT_SIZE__` macros in HIP can 
only provide meaningful values during device compilation. They are currently 
usable in host code, but only contain the default value of 64, independent of 
the target device(s).

This patch redefines them during host compilation to issue a deprecation 
warning if the macros are used in host code. Their value during host 
compilation in actual HIP code as well as in preprocessing directives stays 64 
as before. Macro uses in preprocessing directives are not diagnosed. Macro uses 
in device code are not affected.

In a later step, after a deprecation period, we can easily adjust this 
implementation so that macro uses in host code cause hard errors instead of 
warnings.

**Considered Alternatives:**
- Introducing a specialized diagnostic during clang's semantic analysis:
  This is technically possible and allows for cleaner diagnostics, but requires 
HIP-specific special case handling in clang's very general 
`Sema::ActOnNumericConstant(...)` method, since these macros appear as integer 
literals during parsing/semantic analysis where we know if we are in a host 
function. In comparison, this PR introduces less complexity to code that is
  independent from HIP.

- See also the previous rejected proposal, which eliminates the macros for host 
compilation: https://github.com/llvm/llvm-project/pull/83558

**Implementation Rationale:**
- I have placed the macro redefinitions in a new header file so that it is 
included even if the `-nogpuinc`, `-nobuiltininc`, and/or `-nostdinc` CLI flags 
are provided, enabling consistent diagnostics with any combination of these 
flags. I am open to suggestions for better solutions.
- The constexpr function with separate overloads for host and device is a HIP 
feature that allows us to identify macro uses in host code without special-case 
handling in the semantic analysis. Their returned value is irrelevant, they are 
only referenced for the deprecation warning. Constexpr variables cannot be 
overloaded like this.
- The `AMDGCN_WAVEFRONT_SIZE` macros are commonly used in preprocessing 
directives for conditional includes. The defined expression is carefully 
crafted to not break this use case:
  - Calling the constexpr function instead of referencing its value as a 
function pointer would be diagnosed as an undefined function-like macro by the 
preprocessor in directives.
  - Using the more natural comma operator instead of the ternary conditional 
operator to discard the value of the constexpr function in the expression is 
illegal in constant expressions that may occur in preprocessing directives 
according to the Standard (e.g., the C11 Standard, Section 6.6 "Constant 
expressions", paragraph 3: "Constant expressions shall not contain assignment, 
increment, decrement, function-call, or comma operators, except when they are 
contained within a subexpression that is not evaluated.") Clang diagnoses this 
with -pedantic.
  - In preprocessing directives, the function identifier is considered an 
undefined macro, which is interpreted as 0.


Implements SWDEV-449015.


>From 8743f8ab0ca1a158d8ed32652d52f58d7a319fac Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Tue, 7 May 2024 11:39:17 -0400
Subject: [PATCH] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in
 host code

The __AMDGCN_WAVEFRONT_SIZE and __AMDGCN_WAVEFRONT_SIZE__ macros in HIP can
only provide meaningful values during device compilation. They are currently
usable in host code, but only contain the default value of 64, independent of
the target device(s).
This patch redefines them during host compilation to issue a deprecation
warning if the macros are used in host code. Their value during host
compilation in actual HIP code as well as in preprocessing directives stays 64
as before. Macro uses in preprocessing directives are not diagnosed. Macro uses
in device code are not affected.

Implements SWDEV-449015.
---
 clang/lib/Driver/ToolChains/AMDGPU.cpp| 11 
 clang/lib/Headers/CMakeLists.txt  |  1 +
 .../Headers/__clang_hip_device_macro_guards.h | 55 +++
 .../hip-wavefront-size-host-diagnostics.hip   | 52 ++
 .../Preprocessor/predefined-arch-macros.c |  1 -
 5 files changed, 119 insertions(+), 1 deletion(-)
 create mode 100644 clang/lib/Headers/__clang_hip_device_macro_guards.h
 create mode 100644 clang/test/Driver/hip-wavefront-size-host-diagnostics.hip

diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 07965b487ea79..587aa19349d89 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -550,6 +550,17 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const 
ArgList &DriverArgs,
 CC1Args.push_back(DriverArgs.MakeArgString(P));
   }
 
+  {
+// This header implements diagnostics

[clang] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code (PR #91478)

2024-05-10 Thread Fabian Ritter via cfe-commits



@@ -0,0 +1,55 @@
+/*=== __clang_hip_device_macro_guards.h - guards for HIP device macros -===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===---===
+ */
+
+/*
+ * WARNING: This header is intended to be directly -include'd by
+ * the compiler and is not supposed to be included by users.
+ *
+ */
+
+#ifndef __CLANG_HIP_DEVICE_MACRO_GUARDS_H__
+#define __CLANG_HIP_DEVICE_MACRO_GUARDS_H__
+
+#if __HIP__
+#if !defined(__HIP_DEVICE_COMPILE__)
+// The __AMDGCN_WAVEFRONT_SIZE macros cannot hold meaningful values during host
+// compilation as devices are not initialized when the macros are defined and
+// there may indeed be devices with differing wavefront sizes in the same
+// system. This code issues diagnostics when the macros are used in host code.
+
+#undef __AMDGCN_WAVEFRONT_SIZE
+#undef __AMDGCN_WAVEFRONT_SIZE__
+
+// Reference __hip_device_macro_guard in a way that is legal in preprocessor
+// directives and does not affect the value so that appropriate diagnostics are
+// issued. Function calls, casts, or the comma operator would make the macro
+// illegal for use in preprocessor directives.
+#define __AMDGCN_WAVEFRONT_SIZE (!__hip_device_macro_guard ? 64 : 64)
+#define __AMDGCN_WAVEFRONT_SIZE__ (!__hip_device_macro_guard ? 64 : 64)
+
+// This function is referenced by the macro in device functions during host
+// compilation, it SHOULD NOT cause a diagnostic.
+__attribute__((device)) static constexpr int __hip_device_macro_guard(void) {
+  return -1;
+}
+
+// This function is referenced by the macro in host functions during host
+// compilation, it SHOULD cause a diagnostic.
+__attribute__((
+host, deprecated("The __AMDGCN_WAVEFRONT_SIZE macros do not correspond "
+ "to the device(s) when used in host code and may only "
+ "be used in device code."))) static constexpr int

ritter-x2a wrote:

re pre-C++11 HIP: I think we can just drop the `constexpr` from both variants 
of the guard function; since the guard function is only referenced and never 
called, the macros would still work as constant expressions.

re OpenMP: As far as I can see in experiments, the macros are not defined 
during OpenMP's host compilation. This is therefore not an issue for OpenMP.

https://github.com/llvm/llvm-project/pull/91478
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

2024-08-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/103031

>From 274aaef1847bbdd837213064113adb1182e5bb59 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Tue, 13 Aug 2024 05:27:45 -0400
Subject: [PATCH 1/2] [Clang][HIP] Target-dependent overload resolution in
 declarators and specifiers

So far, the resolution of host/device overloads for functions in HIP/CUDA
operates as if in a host-device context for code outside of function bodies,
e.g., in expressions that are part of template arguments in top-level
declarations. This means that, if separate host and device overloads are
declared, the device overload is used in the device compilation phase and the
host overload is used in the host compilation phase.

This patch changes overload resolution in such cases to prefer overloads that
match the target of the declaration in which they occur. For example:

__device__ constexpr int get_n() { return 64; }
__host__ constexpr int get_n() { return -1; }
__device__ std::enable_if<(get_n() > 32)>::type foo() { }

Before, this code would not compile, because get_n resolved to the host
overload during host compilation, causing an error. With this patch, the call
to get_n in the declaration of the device function foo resolves to the device
overload in host and device compilation.

If attributes that affect the declaration's target occur after a call with
target-dependent overload resolution, a warning is issued. This is realized by
registering the Kinds of relevant attributes in the CUDATargetContext when they
are parsed.

This is an alternative to PR #93546, which is required for PR #91478.
---
 .../clang/Basic/DiagnosticSemaKinds.td|   4 +
 clang/include/clang/Sema/SemaCUDA.h   |  32 +-
 clang/lib/Parse/ParseDecl.cpp |   6 +
 clang/lib/Parse/ParseDeclCXX.cpp  |   6 +
 clang/lib/Parse/Parser.cpp|   8 +
 clang/lib/Sema/SemaCUDA.cpp   | 113 ++-
 clang/lib/Sema/SemaOverload.cpp   |   2 +-
 .../target-overloads-availability-warnings.cu | 148 
 ...target-overloads-in-function-prototypes.cu | 690 ++
 9 files changed, 988 insertions(+), 21 deletions(-)
 create mode 100644 
clang/test/SemaCUDA/target-overloads-availability-warnings.cu
 create mode 100644 
clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 554dbaff2ce0d8..8709f60678b466 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9017,6 +9017,10 @@ def err_global_call_not_config : Error<
 def err_ref_bad_target : Error<
   "reference to %select{__device__|__global__|__host__|__host__ __device__}0 "
   "%select{function|variable}1 %2 in 
%select{__device__|__global__|__host__|__host__ __device__}3 function">;
+def warn_target_specfier_ignored : Warning<
+  "target specifier has been ignored for overload resolution; "
+  "move the target specifier to the beginning of the declaration to use it for 
overload resolution">,
+  InGroup;
 def note_cuda_const_var_unpromoted : Note<
   "const variable cannot be emitted on device side due to dynamic 
initialization">;
 def note_cuda_host_var : Note<
diff --git a/clang/include/clang/Sema/SemaCUDA.h 
b/clang/include/clang/Sema/SemaCUDA.h
index 63dc3f4da240b3..83083ada889a16 100644
--- a/clang/include/clang/Sema/SemaCUDA.h
+++ b/clang/include/clang/Sema/SemaCUDA.h
@@ -104,6 +104,8 @@ class SemaCUDA : public SemaBase {
   CUDAFunctionTarget IdentifyTarget(const FunctionDecl *D,
 bool IgnoreImplicitHDAttr = false);
   CUDAFunctionTarget IdentifyTarget(const ParsedAttributesView &Attrs);
+  CUDAFunctionTarget IdentifyTarget(
+  const SmallVectorImpl &AttrKinds);
 
   enum CUDAVariableTarget {
 CVT_Device,  /// Emitted on device side with a shadow variable on host side
@@ -120,21 +122,43 @@ class SemaCUDA : public SemaBase {
 CTCK_Unknown,   /// Unknown context
 CTCK_InitGlobalVar, /// Function called during global variable
 /// initialization
+CTCK_Declaration,   /// Function called in a declaration specifier or
+/// declarator outside of other contexts, usually in
+/// template arguments.
   };
 
   /// Define the current global CUDA host/device context where a function may 
be
   /// called. Only used when a function is called outside of any functions.
-  struct CUDATargetContext {
-CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice;
+  class CUDATargetContext {
+  public:
 CUDATargetContextKind Kind = CTCK_Unknown;
-Decl *D = nullptr;
+
+CUDATargetContext() = default;
+
+CUDATargetContext(SemaCUDA *S, CUDATargetContextKind Kind,
+  CUDAFunctionTarget Target);
+
+CUDAFunctionTarget getTarget();
+
+/// If this

[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

2024-08-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/103031

>From 274aaef1847bbdd837213064113adb1182e5bb59 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Tue, 13 Aug 2024 05:27:45 -0400
Subject: [PATCH 1/3] [Clang][HIP] Target-dependent overload resolution in
 declarators and specifiers

So far, the resolution of host/device overloads for functions in HIP/CUDA
operates as if in a host-device context for code outside of function bodies,
e.g., in expressions that are part of template arguments in top-level
declarations. This means that, if separate host and device overloads are
declared, the device overload is used in the device compilation phase and the
host overload is used in the host compilation phase.

This patch changes overload resolution in such cases to prefer overloads that
match the target of the declaration in which they occur. For example:

__device__ constexpr int get_n() { return 64; }
__host__ constexpr int get_n() { return -1; }
__device__ std::enable_if<(get_n() > 32)>::type foo() { }

Before, this code would not compile, because get_n resolved to the host
overload during host compilation, causing an error. With this patch, the call
to get_n in the declaration of the device function foo resolves to the device
overload in host and device compilation.

If attributes that affect the declaration's target occur after a call with
target-dependent overload resolution, a warning is issued. This is realized by
registering the Kinds of relevant attributes in the CUDATargetContext when they
are parsed.

This is an alternative to PR #93546, which is required for PR #91478.
---
 .../clang/Basic/DiagnosticSemaKinds.td|   4 +
 clang/include/clang/Sema/SemaCUDA.h   |  32 +-
 clang/lib/Parse/ParseDecl.cpp |   6 +
 clang/lib/Parse/ParseDeclCXX.cpp  |   6 +
 clang/lib/Parse/Parser.cpp|   8 +
 clang/lib/Sema/SemaCUDA.cpp   | 113 ++-
 clang/lib/Sema/SemaOverload.cpp   |   2 +-
 .../target-overloads-availability-warnings.cu | 148 
 ...target-overloads-in-function-prototypes.cu | 690 ++
 9 files changed, 988 insertions(+), 21 deletions(-)
 create mode 100644 
clang/test/SemaCUDA/target-overloads-availability-warnings.cu
 create mode 100644 
clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 554dbaff2ce0d8..8709f60678b466 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9017,6 +9017,10 @@ def err_global_call_not_config : Error<
 def err_ref_bad_target : Error<
   "reference to %select{__device__|__global__|__host__|__host__ __device__}0 "
   "%select{function|variable}1 %2 in 
%select{__device__|__global__|__host__|__host__ __device__}3 function">;
+def warn_target_specfier_ignored : Warning<
+  "target specifier has been ignored for overload resolution; "
+  "move the target specifier to the beginning of the declaration to use it for 
overload resolution">,
+  InGroup;
 def note_cuda_const_var_unpromoted : Note<
   "const variable cannot be emitted on device side due to dynamic 
initialization">;
 def note_cuda_host_var : Note<
diff --git a/clang/include/clang/Sema/SemaCUDA.h 
b/clang/include/clang/Sema/SemaCUDA.h
index 63dc3f4da240b3..83083ada889a16 100644
--- a/clang/include/clang/Sema/SemaCUDA.h
+++ b/clang/include/clang/Sema/SemaCUDA.h
@@ -104,6 +104,8 @@ class SemaCUDA : public SemaBase {
   CUDAFunctionTarget IdentifyTarget(const FunctionDecl *D,
 bool IgnoreImplicitHDAttr = false);
   CUDAFunctionTarget IdentifyTarget(const ParsedAttributesView &Attrs);
+  CUDAFunctionTarget IdentifyTarget(
+  const SmallVectorImpl &AttrKinds);
 
   enum CUDAVariableTarget {
 CVT_Device,  /// Emitted on device side with a shadow variable on host side
@@ -120,21 +122,43 @@ class SemaCUDA : public SemaBase {
 CTCK_Unknown,   /// Unknown context
 CTCK_InitGlobalVar, /// Function called during global variable
 /// initialization
+CTCK_Declaration,   /// Function called in a declaration specifier or
+/// declarator outside of other contexts, usually in
+/// template arguments.
   };
 
   /// Define the current global CUDA host/device context where a function may 
be
   /// called. Only used when a function is called outside of any functions.
-  struct CUDATargetContext {
-CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice;
+  class CUDATargetContext {
+  public:
 CUDATargetContextKind Kind = CTCK_Unknown;
-Decl *D = nullptr;
+
+CUDATargetContext() = default;
+
+CUDATargetContext(SemaCUDA *S, CUDATargetContextKind Kind,
+  CUDAFunctionTarget Target);
+
+CUDAFunctionTarget getTarget();
+
+/// If this

[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

2024-08-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/103031

>From 274aaef1847bbdd837213064113adb1182e5bb59 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Tue, 13 Aug 2024 05:27:45 -0400
Subject: [PATCH 1/4] [Clang][HIP] Target-dependent overload resolution in
 declarators and specifiers

So far, the resolution of host/device overloads for functions in HIP/CUDA
operates as if in a host-device context for code outside of function bodies,
e.g., in expressions that are part of template arguments in top-level
declarations. This means that, if separate host and device overloads are
declared, the device overload is used in the device compilation phase and the
host overload is used in the host compilation phase.

This patch changes overload resolution in such cases to prefer overloads that
match the target of the declaration in which they occur. For example:

__device__ constexpr int get_n() { return 64; }
__host__ constexpr int get_n() { return -1; }
__device__ std::enable_if<(get_n() > 32)>::type foo() { }

Before, this code would not compile, because get_n resolved to the host
overload during host compilation, causing an error. With this patch, the call
to get_n in the declaration of the device function foo resolves to the device
overload in host and device compilation.

If attributes that affect the declaration's target occur after a call with
target-dependent overload resolution, a warning is issued. This is realized by
registering the Kinds of relevant attributes in the CUDATargetContext when they
are parsed.

This is an alternative to PR #93546, which is required for PR #91478.
---
 .../clang/Basic/DiagnosticSemaKinds.td|   4 +
 clang/include/clang/Sema/SemaCUDA.h   |  32 +-
 clang/lib/Parse/ParseDecl.cpp |   6 +
 clang/lib/Parse/ParseDeclCXX.cpp  |   6 +
 clang/lib/Parse/Parser.cpp|   8 +
 clang/lib/Sema/SemaCUDA.cpp   | 113 ++-
 clang/lib/Sema/SemaOverload.cpp   |   2 +-
 .../target-overloads-availability-warnings.cu | 148 
 ...target-overloads-in-function-prototypes.cu | 690 ++
 9 files changed, 988 insertions(+), 21 deletions(-)
 create mode 100644 
clang/test/SemaCUDA/target-overloads-availability-warnings.cu
 create mode 100644 
clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 554dbaff2ce0d8..8709f60678b466 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9017,6 +9017,10 @@ def err_global_call_not_config : Error<
 def err_ref_bad_target : Error<
   "reference to %select{__device__|__global__|__host__|__host__ __device__}0 "
   "%select{function|variable}1 %2 in 
%select{__device__|__global__|__host__|__host__ __device__}3 function">;
+def warn_target_specfier_ignored : Warning<
+  "target specifier has been ignored for overload resolution; "
+  "move the target specifier to the beginning of the declaration to use it for 
overload resolution">,
+  InGroup;
 def note_cuda_const_var_unpromoted : Note<
   "const variable cannot be emitted on device side due to dynamic 
initialization">;
 def note_cuda_host_var : Note<
diff --git a/clang/include/clang/Sema/SemaCUDA.h 
b/clang/include/clang/Sema/SemaCUDA.h
index 63dc3f4da240b3..83083ada889a16 100644
--- a/clang/include/clang/Sema/SemaCUDA.h
+++ b/clang/include/clang/Sema/SemaCUDA.h
@@ -104,6 +104,8 @@ class SemaCUDA : public SemaBase {
   CUDAFunctionTarget IdentifyTarget(const FunctionDecl *D,
 bool IgnoreImplicitHDAttr = false);
   CUDAFunctionTarget IdentifyTarget(const ParsedAttributesView &Attrs);
+  CUDAFunctionTarget IdentifyTarget(
+  const SmallVectorImpl &AttrKinds);
 
   enum CUDAVariableTarget {
 CVT_Device,  /// Emitted on device side with a shadow variable on host side
@@ -120,21 +122,43 @@ class SemaCUDA : public SemaBase {
 CTCK_Unknown,   /// Unknown context
 CTCK_InitGlobalVar, /// Function called during global variable
 /// initialization
+CTCK_Declaration,   /// Function called in a declaration specifier or
+/// declarator outside of other contexts, usually in
+/// template arguments.
   };
 
   /// Define the current global CUDA host/device context where a function may 
be
   /// called. Only used when a function is called outside of any functions.
-  struct CUDATargetContext {
-CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice;
+  class CUDATargetContext {
+  public:
 CUDATargetContextKind Kind = CTCK_Unknown;
-Decl *D = nullptr;
+
+CUDATargetContext() = default;
+
+CUDATargetContext(SemaCUDA *S, CUDATargetContextKind Kind,
+  CUDAFunctionTarget Target);
+
+CUDAFunctionTarget getTarget();
+
+/// If this

[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

2024-08-19 Thread Fabian Ritter via cfe-commits



@@ -9017,6 +9017,10 @@ def err_global_call_not_config : Error<
 def err_ref_bad_target : Error<
   "reference to %select{__device__|__global__|__host__|__host__ __device__}0 "
   "%select{function|variable}1 %2 in 
%select{__device__|__global__|__host__|__host__ __device__}3 function">;
+def warn_target_specfier_ignored : Warning<
+  "target specifier has been ignored for overload resolution; "

ritter-x2a wrote:

Addressed in e273a996854cb3256cfeed832c3f16ff6fd0f534, thank you for the 
feedback!

https://github.com/llvm/llvm-project/pull/103031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

2024-08-19 Thread Fabian Ritter via cfe-commits



@@ -115,20 +143,65 @@ static bool hasAttr(const Decl *D, bool 
IgnoreImplicitAttr) {
  });
 }
 
+SemaCUDA::CUDATargetContext::CUDATargetContext(SemaCUDA *S,

ritter-x2a wrote:

Addressed in e273a996854cb3256cfeed832c3f16ff6fd0f534.

https://github.com/llvm/llvm-project/pull/103031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

2024-08-19 Thread Fabian Ritter via cfe-commits



@@ -0,0 +1,703 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only 
-verify=expected,onhost %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device 
-verify=expected,ondevice %s
+
+
+// Tests to ensure that functions with host and device overloads in that are
+// called outside of function bodies and variable initializers, e.g., in
+// template arguments are resolved with respect to the declaration to which 
they
+// belong.
+
+// Opaque types used for tests:
+struct DeviceTy {};
+struct HostTy {};
+struct HostDeviceTy {};
+struct TemplateTy {};
+
+struct TrueTy { static const bool value = true; };
+struct FalseTy { static const bool value = false; };
+
+// Select one of two types based on a boolean condition.
+template  struct select_type {};
+template  struct select_type { typedef T 
type; };
+template  struct select_type { typedef F 
type; };
+
+template  struct check : public select_type { };
+
+// Check if two types are the same.
+template struct is_same : public FalseTy { };
+template struct is_same : public TrueTy { };
+
+// A static assertion that fails at compile time if the expression E does not
+// have type T.
+#define ASSERT_HAS_TYPE(E, T) static_assert(is_same::value);
+
+
+// is_on_device() is true when called in a device context and false if called 
in a host context.
+__attribute__((host)) constexpr bool is_on_device(void) { return false; }
+__attribute__((device)) constexpr bool is_on_device(void) { return true; }
+
+
+// this type depends on whether it occurs in host or device code
+#define targetdep_t select_type::type
+
+// Defines and typedefs with different values in host and device compilation.
+#ifdef __CUDA_ARCH__
+#define CurrentTarget DEVICE
+typedef DeviceTy CurrentTargetTy;
+typedef DeviceTy TemplateIfHostTy;
+#else
+#define CurrentTarget HOST
+typedef HostTy CurrentTargetTy;
+typedef TemplateTy TemplateIfHostTy;
+#endif
+
+
+
+// targetdep_t in function declarations should depend on the target of the
+// declared function.
+__attribute__((device)) targetdep_t decl_ret_early_device(void);
+ASSERT_HAS_TYPE(decl_ret_early_device(), DeviceTy)
+
+__attribute__((host)) targetdep_t decl_ret_early_host(void);
+ASSERT_HAS_TYPE(decl_ret_early_host(), HostTy)
+
+__attribute__((host,device)) targetdep_t decl_ret_early_host_device(void);
+ASSERT_HAS_TYPE(decl_ret_early_host_device(), CurrentTargetTy)
+
+// If the function target is specified too late and can therefore not be
+// considered for overload resolution in targetdep_t, warn.
+targetdep_t __attribute__((device)) decl_ret_late_device(void); // 
expected-warning {{target attribute has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_late_device(), HostTy)
+
+// No warning necessary if the ignored attribute doesn't change the result.
+targetdep_t __attribute__((host)) decl_ret_late_host(void);
+ASSERT_HAS_TYPE(decl_ret_late_host(), HostTy)
+
+targetdep_t __attribute__((host,device)) decl_ret_late_host_device(void); // 
expected-warning {{target attribute has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_late_host_device(), HostTy)
+
+// An odd way of writing this, but it's possible.
+__attribute__((device)) targetdep_t __attribute__((host)) 
decl_ret_early_device_late_host(void); // expected-warning {{target attribute 
has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_early_device_late_host(), DeviceTy)
+
+
+// The same for function definitions and parameter types:
+__attribute__((device)) targetdep_t ret_early_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+__attribute__((host)) targetdep_t ret_early_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((host, device)) targetdep_t ret_early_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+// The parameter is still after the attribute, so it needs no warning.
+targetdep_t __attribute__((device)) // expected-warning {{target attribute has 
been ignored for overload resolution}}
+ret_late_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+targetdep_t __attribute__((host, device)) // expected-warning {{target 
attribute has been ignored for overload resolution}}
+ret_late_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((device)) targetdep_t __attribute__((host)) // expected-warning 
{{target attribute has been ignored for overload resolution}}
+ret_early_device_late_host(targetdep_t x)

[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

2024-08-26 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

Ping.

https://github.com/llvm/llvm-project/pull/103031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code (PR #91478)

2024-10-07 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/91478

>From 4145231fa662dc9324443e98209c8a7c61ed2ed9 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Tue, 7 May 2024 11:39:17 -0400
Subject: [PATCH] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in
 host code

The __AMDGCN_WAVEFRONT_SIZE and __AMDGCN_WAVEFRONT_SIZE__ macros in HIP can
only provide meaningful values during device compilation. They are currently
usable in host code, but only contain the default value of 64, independent of
the target device(s).
This patch redefines them during host compilation to issue a deprecation
warning if the macros are used in host code. Their value during host
compilation in actual HIP code as well as in preprocessing directives stays 64
as before. Macro uses in preprocessing directives are not diagnosed. Macro uses
in device code are not affected.

Implements SWDEV-449015.
---
 clang/lib/Driver/ToolChains/AMDGPU.cpp| 11 
 clang/lib/Headers/CMakeLists.txt  |  1 +
 .../Headers/__clang_hip_device_macro_guards.h | 55 +++
 .../hip-wavefront-size-host-diagnostics.hip   | 52 ++
 .../Preprocessor/predefined-arch-macros.c |  1 -
 5 files changed, 119 insertions(+), 1 deletion(-)
 create mode 100644 clang/lib/Headers/__clang_hip_device_macro_guards.h
 create mode 100644 clang/test/Driver/hip-wavefront-size-host-diagnostics.hip

diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 2c85d21ebd738c..128fc2bdea0545 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -551,6 +551,17 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const 
ArgList &DriverArgs,
 CC1Args.push_back(DriverArgs.MakeArgString(P));
   }
 
+  {
+// This header implements diagnostics for problematic uses of
+// device-specific macros. Since these diagnostics should be issued even
+// when GPU headers are not included, this header is included separately.
+SmallString<128> P(D.ResourceDir);
+llvm::sys::path::append(P, "include");
+CC1Args.push_back("-internal-isystem");
+CC1Args.push_back(DriverArgs.MakeArgString(P));
+CC1Args.append({"-include", "__clang_hip_device_macro_guards.h"});
+  }
+
   const auto HandleHipStdPar = [=, &DriverArgs, &CC1Args]() {
 StringRef Inc = getIncludePath();
 auto &FS = D.getVFS();
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ff392e7122a448..582c8bd6c8a2da 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -79,6 +79,7 @@ set(hip_files
   __clang_hip_math.h
   __clang_hip_stdlib.h
   __clang_hip_runtime_wrapper.h
+  __clang_hip_device_macro_guards.h
   )
 
 set(hlsl_h
diff --git a/clang/lib/Headers/__clang_hip_device_macro_guards.h 
b/clang/lib/Headers/__clang_hip_device_macro_guards.h
new file mode 100644
index 00..42782c9bb08a7e
--- /dev/null
+++ b/clang/lib/Headers/__clang_hip_device_macro_guards.h
@@ -0,0 +1,55 @@
+/*=== __clang_hip_device_macro_guards.h - guards for HIP device macros -===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===---===
+ */
+
+/*
+ * WARNING: This header is intended to be directly -include'd by
+ * the compiler and is not supposed to be included by users.
+ *
+ */
+
+#ifndef __CLANG_HIP_DEVICE_MACRO_GUARDS_H__
+#define __CLANG_HIP_DEVICE_MACRO_GUARDS_H__
+
+#if __HIP__
+#if !defined(__HIP_DEVICE_COMPILE__)
+// The __AMDGCN_WAVEFRONT_SIZE macros cannot hold meaningful values during host
+// compilation as devices are not initialized when the macros are defined and
+// there may indeed be devices with differing wavefront sizes in the same
+// system. This code issues diagnostics when the macros are used in host code.
+
+#undef __AMDGCN_WAVEFRONT_SIZE
+#undef __AMDGCN_WAVEFRONT_SIZE__
+
+// Reference __hip_device_macro_guard in a way that is legal in preprocessor
+// directives and does not affect the value so that appropriate diagnostics are
+// issued. Function calls, casts, or the comma operator would make the macro
+// illegal for use in preprocessor directives.
+#define __AMDGCN_WAVEFRONT_SIZE (!__hip_device_macro_guard ? 64 : 64)
+#define __AMDGCN_WAVEFRONT_SIZE__ (!__hip_device_macro_guard ? 64 : 64)
+
+// This function is referenced by the macro in device functions during host
+// compilation, it SHOULD NOT cause a diagnostic.
+__attribute__((device)) static constexpr int __hip_device_macro_guard(void) {
+  return -1;
+}
+
+// This function is referenced by the macro in host functions during host
+// compilation, it SHOULD cause a diagnostic.
+__attribute__((
+host, deprecated("The __AMDGCN_WAVEFRONT_SIZE macros do not correspon

[clang] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code without relying on target-dependent overload resolution (PR #109663)

2024-09-30 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a edited 
https://github.com/llvm/llvm-project/pull/109663
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code without relying on target-dependent overload resolution (PR #109663)

2024-09-30 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a edited 
https://github.com/llvm/llvm-project/pull/109663
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code without relying on target-dependent overload resolution (PR #109663)

2024-09-30 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

Ping.

https://github.com/llvm/llvm-project/pull/109663
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code without relying on target-dependent overload resolution (PR #109663)

2024-10-02 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

@Artem-B  thank you for the suggestion!
Do I understand correctly that it boils down to the following?
  - If a (new) clang CLI option is set: enable PR #91478 - which, among other 
things, warns during host compilation whenever `__AMDGCN_WAVEFRONT_SIZE__` is 
used in template default arguments or template arguments outside of function 
bodies and global initializers,
  - provide `__AMDGCN_WAVEFRONT_SIZE_IS_64_ON_THE_HOST__` as an unchecked and 
more clearly named alternative, and
  - probably change the wording of the deprecation warning to also point to 
this new macro as an alternative to silence the warning.

We would then recommend to the users to use 
`__AMDGCN_WAVEFRONT_SIZE_IS_64_ON_THE_HOST__` instead of 
`__AMDGCN_WAVEFRONT_SIZE__` outside of function bodies and global initializers 
(and to make sure that that is the behavior they want).

My main concern with this is that I'm not sure if users are likely to manually 
turn on these extra checks, so that the less strict but always-on diagnostics 
produced by this PR might have a bigger impact in practice, at least until we 
change the default.
Also: With the host/device target selection outside of function bodies during 
the different compilation modes as it is, users would need quite deep knowledge 
about compiler internals to make sure that 
`__AMDGCN_WAVEFRONT_SIZE_IS_64_ON_THE_HOST__` is what they want (but, then, 
they also need that when using `__AMDGCN_WAVEFRONT_SIZE__` in these places 
right now, so it might be okay to require that).

https://github.com/llvm/llvm-project/pull/109663
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros (PR #112849)

2024-10-21 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/112849

>From acbab11de650830ff94905cabb90f6a680552052 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Fri, 18 Oct 2024 03:35:13 -0400
Subject: [PATCH 1/2] [Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros

So far, these macros can be used in contexts where no meaningful wavefront size
is available. We therefore deprecate these macros, to replace them with a more
resilient interface to access wavefront size information where it is available.

For SWDEV-491529.
---
 clang/docs/AMDGPUSupport.rst  |   4 +-
 clang/docs/HIPSupport.rst |   2 +-
 clang/include/clang/Basic/MacroBuilder.h  |   9 +-
 clang/lib/Basic/Targets/AMDGPU.cpp|   9 +-
 ...wavefront-size-deprecation-diagnostics.hip | 111 ++
 5 files changed, 128 insertions(+), 7 deletions(-)
 create mode 100644 
clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip

diff --git a/clang/docs/AMDGPUSupport.rst b/clang/docs/AMDGPUSupport.rst
index e63c0e1ba7d67b..3eada5f900613a 100644
--- a/clang/docs/AMDGPUSupport.rst
+++ b/clang/docs/AMDGPUSupport.rst
@@ -50,9 +50,9 @@ Predefined Macros
* - ``__AMDGCN_UNSAFE_FP_ATOMICS__``
  - Defined if unsafe floating-point atomics are allowed.
* - ``__AMDGCN_WAVEFRONT_SIZE__``
- - Defines the wavefront size. Allowed values are 32 and 64.
+ - Defines the wavefront size. Allowed values are 32 and 64 (deprecated).
* - ``__AMDGCN_WAVEFRONT_SIZE``
- - Alias to ``__AMDGCN_WAVEFRONT_SIZE__``. To be deprecated.
+ - Alias to ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated).
* - ``__HAS_FMAF__``
  - Defined if FMAF instruction is available (deprecated).
* - ``__HAS_LDEXPF__``
diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst
index e26297c7af97ac..e830acd8dd85c0 100644
--- a/clang/docs/HIPSupport.rst
+++ b/clang/docs/HIPSupport.rst
@@ -178,7 +178,7 @@ Predefined Macros
 
 Note that some architecture specific AMDGPU macros will have default values 
when
 used from the HIP host compilation. Other :doc:`AMDGPU macros `
-like ``__AMDGCN_WAVEFRONT_SIZE__`` will default to 64 for example.
+like ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated) will default to 64 for example.
 
 Compilation Modes
 =
diff --git a/clang/include/clang/Basic/MacroBuilder.h 
b/clang/include/clang/Basic/MacroBuilder.h
index 96e67cbbfa3f21..c8236cb40a1cf2 100644
--- a/clang/include/clang/Basic/MacroBuilder.h
+++ b/clang/include/clang/Basic/MacroBuilder.h
@@ -17,6 +17,7 @@
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
+#include 
 
 namespace clang {
 
@@ -26,8 +27,14 @@ class MacroBuilder {
   MacroBuilder(raw_ostream &Output) : Out(Output) {}
 
   /// Append a \#define line for macro of the form "\#define Name Value\n".
-  void defineMacro(const Twine &Name, const Twine &Value = "1") {
+  /// If DeprecationMsg is provided, also append a pragma to deprecate the
+  /// defined macro.
+  void defineMacro(const Twine &Name, const Twine &Value = "1",
+   std::optional DeprecationMsg = std::nullopt) {
 Out << "#define " << Name << ' ' << Value << '\n';
+if (DeprecationMsg.has_value())
+  Out << "#pragma clang deprecated(" << Name << ", \""
+  << DeprecationMsg.value() << "\")\n";
   }
 
   /// Append a \#undef line for Name.  Name should be of the form XXX
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 3b748d0249d57b..8bb4cf5c597dd7 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -337,9 +337,12 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions 
&Opts,
   if (hasFastFMA())
 Builder.defineMacro("FP_FAST_FMA");
 
-  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
-  // ToDo: deprecate this macro for naming consistency.
-  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
+  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize),
+  "compile-time-constant access to the wavefront size will 
"
+  "be removed in a future release");
+  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize),
+  "compile-time-constant access to the wavefront size will 
"
+  "be removed in a future release");
   Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
 }
 
diff --git a/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip 
b/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip
new file mode 100644
index 00..aca591536a76c0
--- /dev/null
+++ b/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip
@@ -0,0 +1,111 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang -xhip --offload-arch=gfx1030 --offload-host-only -pedantic 
-nogpuinc -nogpulib -nobuiltininc -fsyntax-o

[clang] [Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros (PR #112849)

2024-10-21 Thread Fabian Ritter via cfe-commits



@@ -26,8 +27,14 @@ class MacroBuilder {
   MacroBuilder(raw_ostream &Output) : Out(Output) {}
 
   /// Append a \#define line for macro of the form "\#define Name Value\n".
-  void defineMacro(const Twine &Name, const Twine &Value = "1") {
+  /// If DeprecationMsg is provided, also append a pragma to deprecate the
+  /// defined macro.
+  void defineMacro(const Twine &Name, const Twine &Value = "1",
+   std::optional DeprecationMsg = std::nullopt) {

ritter-x2a wrote:

Changed in a798615ab4608f1681b15a526bc898df3c12bfa3.

https://github.com/llvm/llvm-project/pull/112849
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros (PR #112849)

2024-10-21 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

> > Just a heads up, this change is gonna break some OpenMP tests, specifically 
> > those prefix with `ompx`.
> 
> Just to clarify, adding the deprecation warning will break them, or the 
> eventual, as-of-yet not-scheduled, removal, will?

On my system, the deprecation warning did not break any lit tests (nor tests 
from our internal testing). The `ompx` tests use the macro and will break once 
we actually remove it.

https://github.com/llvm/llvm-project/pull/112849
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

2024-10-16 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a closed 
https://github.com/llvm/llvm-project/pull/103031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

2024-10-16 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

Closing this PR in favor of a more comprehensive treatment of the 
AMDGCN_WAVEFRONT_SIZE situation.

https://github.com/llvm/llvm-project/pull/103031
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code (PR #91478)

2024-10-16 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a closed 
https://github.com/llvm/llvm-project/pull/91478
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code (PR #91478)

2024-10-16 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

Closing this PR in favor of a more comprehensive treatment of the 
AMDGCN_WAVEFRONT_SIZE situation.

https://github.com/llvm/llvm-project/pull/91478
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code without relying on target-dependent overload resolution (PR #109663)

2024-10-16 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

Closing this PR in favor of a more comprehensive treatment of the 
AMDGCN_WAVEFRONT_SIZE situation.

https://github.com/llvm/llvm-project/pull/109663
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code without relying on target-dependent overload resolution (PR #109663)

2024-10-16 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a closed 
https://github.com/llvm/llvm-project/pull/109663
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Suppress availability diagnostics for mismatched host/device overloads (PR #93546)

2024-10-16 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a closed 
https://github.com/llvm/llvm-project/pull/93546
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Suppress availability diagnostics for mismatched host/device overloads (PR #93546)

2024-10-16 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

Closing this PR in favor of a more comprehensive treatment of the 
AMDGCN_WAVEFRONT_SIZE situation.

https://github.com/llvm/llvm-project/pull/93546
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros (PR #112849)

2024-10-18 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a created 
https://github.com/llvm/llvm-project/pull/112849

So far, these macros can be used in contexts where no meaningful wavefront size 
is available. We therefore deprecate these macros, to replace them with a more 
resilient interface to access wavefront size information where it is available.

For SWDEV-491529.

>From acbab11de650830ff94905cabb90f6a680552052 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Fri, 18 Oct 2024 03:35:13 -0400
Subject: [PATCH] [Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros

So far, these macros can be used in contexts where no meaningful wavefront size
is available. We therefore deprecate these macros, to replace them with a more
resilient interface to access wavefront size information where it is available.

For SWDEV-491529.
---
 clang/docs/AMDGPUSupport.rst  |   4 +-
 clang/docs/HIPSupport.rst |   2 +-
 clang/include/clang/Basic/MacroBuilder.h  |   9 +-
 clang/lib/Basic/Targets/AMDGPU.cpp|   9 +-
 ...wavefront-size-deprecation-diagnostics.hip | 111 ++
 5 files changed, 128 insertions(+), 7 deletions(-)
 create mode 100644 
clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip

diff --git a/clang/docs/AMDGPUSupport.rst b/clang/docs/AMDGPUSupport.rst
index e63c0e1ba7d67b..3eada5f900613a 100644
--- a/clang/docs/AMDGPUSupport.rst
+++ b/clang/docs/AMDGPUSupport.rst
@@ -50,9 +50,9 @@ Predefined Macros
* - ``__AMDGCN_UNSAFE_FP_ATOMICS__``
  - Defined if unsafe floating-point atomics are allowed.
* - ``__AMDGCN_WAVEFRONT_SIZE__``
- - Defines the wavefront size. Allowed values are 32 and 64.
+ - Defines the wavefront size. Allowed values are 32 and 64 (deprecated).
* - ``__AMDGCN_WAVEFRONT_SIZE``
- - Alias to ``__AMDGCN_WAVEFRONT_SIZE__``. To be deprecated.
+ - Alias to ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated).
* - ``__HAS_FMAF__``
  - Defined if FMAF instruction is available (deprecated).
* - ``__HAS_LDEXPF__``
diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst
index e26297c7af97ac..e830acd8dd85c0 100644
--- a/clang/docs/HIPSupport.rst
+++ b/clang/docs/HIPSupport.rst
@@ -178,7 +178,7 @@ Predefined Macros
 
 Note that some architecture specific AMDGPU macros will have default values 
when
 used from the HIP host compilation. Other :doc:`AMDGPU macros `
-like ``__AMDGCN_WAVEFRONT_SIZE__`` will default to 64 for example.
+like ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated) will default to 64 for example.
 
 Compilation Modes
 =
diff --git a/clang/include/clang/Basic/MacroBuilder.h 
b/clang/include/clang/Basic/MacroBuilder.h
index 96e67cbbfa3f21..c8236cb40a1cf2 100644
--- a/clang/include/clang/Basic/MacroBuilder.h
+++ b/clang/include/clang/Basic/MacroBuilder.h
@@ -17,6 +17,7 @@
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
+#include 
 
 namespace clang {
 
@@ -26,8 +27,14 @@ class MacroBuilder {
   MacroBuilder(raw_ostream &Output) : Out(Output) {}
 
   /// Append a \#define line for macro of the form "\#define Name Value\n".
-  void defineMacro(const Twine &Name, const Twine &Value = "1") {
+  /// If DeprecationMsg is provided, also append a pragma to deprecate the
+  /// defined macro.
+  void defineMacro(const Twine &Name, const Twine &Value = "1",
+   std::optional DeprecationMsg = std::nullopt) {
 Out << "#define " << Name << ' ' << Value << '\n';
+if (DeprecationMsg.has_value())
+  Out << "#pragma clang deprecated(" << Name << ", \""
+  << DeprecationMsg.value() << "\")\n";
   }
 
   /// Append a \#undef line for Name.  Name should be of the form XXX
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 3b748d0249d57b..8bb4cf5c597dd7 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -337,9 +337,12 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions 
&Opts,
   if (hasFastFMA())
 Builder.defineMacro("FP_FAST_FMA");
 
-  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
-  // ToDo: deprecate this macro for naming consistency.
-  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
+  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize),
+  "compile-time-constant access to the wavefront size will 
"
+  "be removed in a future release");
+  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize),
+  "compile-time-constant access to the wavefront size will 
"
+  "be removed in a future release");
   Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
 }
 
diff --git a/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip 
b/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip
new file mode 100644
index 00..aca591536a76c0
--- /

[clang] [Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros (PR #112849)

2024-10-29 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

Should we move on with the deprecation of the macros with this PR then? Please 
let me know if there are technical concerns remaining with the PR or approve it 
so that it can land in trunk.

https://github.com/llvm/llvm-project/pull/112849
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros (PR #112849)

2024-10-29 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/112849

>From adcdba420a8ac96fa0b993b820aaa8b42b7d9632 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Fri, 18 Oct 2024 03:35:13 -0400
Subject: [PATCH 1/2] [Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros

So far, these macros can be used in contexts where no meaningful wavefront size
is available. We therefore deprecate these macros, to replace them with a more
resilient interface to access wavefront size information where it is available.

For SWDEV-491529.
---
 clang/docs/AMDGPUSupport.rst  |   4 +-
 clang/docs/HIPSupport.rst |   2 +-
 clang/include/clang/Basic/MacroBuilder.h  |   9 +-
 clang/lib/Basic/Targets/AMDGPU.cpp|   9 +-
 ...wavefront-size-deprecation-diagnostics.hip | 111 ++
 5 files changed, 128 insertions(+), 7 deletions(-)
 create mode 100644 
clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip

diff --git a/clang/docs/AMDGPUSupport.rst b/clang/docs/AMDGPUSupport.rst
index e63c0e1ba7d67b..3eada5f900613a 100644
--- a/clang/docs/AMDGPUSupport.rst
+++ b/clang/docs/AMDGPUSupport.rst
@@ -50,9 +50,9 @@ Predefined Macros
* - ``__AMDGCN_UNSAFE_FP_ATOMICS__``
  - Defined if unsafe floating-point atomics are allowed.
* - ``__AMDGCN_WAVEFRONT_SIZE__``
- - Defines the wavefront size. Allowed values are 32 and 64.
+ - Defines the wavefront size. Allowed values are 32 and 64 (deprecated).
* - ``__AMDGCN_WAVEFRONT_SIZE``
- - Alias to ``__AMDGCN_WAVEFRONT_SIZE__``. To be deprecated.
+ - Alias to ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated).
* - ``__HAS_FMAF__``
  - Defined if FMAF instruction is available (deprecated).
* - ``__HAS_LDEXPF__``
diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst
index e26297c7af97ac..e830acd8dd85c0 100644
--- a/clang/docs/HIPSupport.rst
+++ b/clang/docs/HIPSupport.rst
@@ -178,7 +178,7 @@ Predefined Macros
 
 Note that some architecture specific AMDGPU macros will have default values 
when
 used from the HIP host compilation. Other :doc:`AMDGPU macros `
-like ``__AMDGCN_WAVEFRONT_SIZE__`` will default to 64 for example.
+like ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated) will default to 64 for example.
 
 Compilation Modes
 =
diff --git a/clang/include/clang/Basic/MacroBuilder.h 
b/clang/include/clang/Basic/MacroBuilder.h
index 96e67cbbfa3f21..c8236cb40a1cf2 100644
--- a/clang/include/clang/Basic/MacroBuilder.h
+++ b/clang/include/clang/Basic/MacroBuilder.h
@@ -17,6 +17,7 @@
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
+#include 
 
 namespace clang {
 
@@ -26,8 +27,14 @@ class MacroBuilder {
   MacroBuilder(raw_ostream &Output) : Out(Output) {}
 
   /// Append a \#define line for macro of the form "\#define Name Value\n".
-  void defineMacro(const Twine &Name, const Twine &Value = "1") {
+  /// If DeprecationMsg is provided, also append a pragma to deprecate the
+  /// defined macro.
+  void defineMacro(const Twine &Name, const Twine &Value = "1",
+   std::optional DeprecationMsg = std::nullopt) {
 Out << "#define " << Name << ' ' << Value << '\n';
+if (DeprecationMsg.has_value())
+  Out << "#pragma clang deprecated(" << Name << ", \""
+  << DeprecationMsg.value() << "\")\n";
   }
 
   /// Append a \#undef line for Name.  Name should be of the form XXX
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 078819183afdac..99f8f2944e2796 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -337,9 +337,12 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions 
&Opts,
   if (hasFastFMA())
 Builder.defineMacro("FP_FAST_FMA");
 
-  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
-  // ToDo: deprecate this macro for naming consistency.
-  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
+  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize),
+  "compile-time-constant access to the wavefront size will 
"
+  "be removed in a future release");
+  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize),
+  "compile-time-constant access to the wavefront size will 
"
+  "be removed in a future release");
   Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
 }
 
diff --git a/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip 
b/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip
new file mode 100644
index 00..aca591536a76c0
--- /dev/null
+++ b/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip
@@ -0,0 +1,111 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang -xhip --offload-arch=gfx1030 --offload-host-only -pedantic 
-nogpuinc -nogpulib -nobuiltininc -fsyntax-o

[clang] [Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros (PR #112849)

2024-10-29 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

> > > Should we move on with the deprecation of the macros with this PR then? 
> > > Please let me know if there are technical concerns remaining with the PR 
> > > or approve it so that it can land in trunk.
> > 
> > 
> > CI is red but it seems to be some weird Windows failures. Ideally if the 
> > builtin / intrinsic is the way to go I'd like that to be folded much 
> > earlier because it prevents loop unrolling for cases like SIMT scan / 
> > reduce.
> 
> Let me rebase so that CI runs again; Windows CI was a bit of a mess in the 
> last week or so.

CI looks better now.

https://github.com/llvm/llvm-project/pull/112849
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Reapply: Deprecate the AMDGCN_WAVEFRONT_SIZE macros (PR #115507)

2024-11-08 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a created 
https://github.com/llvm/llvm-project/pull/115507

So far, these macros can be used in contexts where no meaningful
wavefront size is available. We therefore deprecate these macros, to
replace them with a more resilient interface to access wavefront size
information where it is available.

Reapplies #112849 with a fix for the non-hermetic clang test that failed
on Mac after the revert in #115499.

For SWDEV-491529.

>From eb240e351e496e4e22fa46d005021b020f62479a Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Fri, 8 Nov 2024 10:56:39 -0500
Subject: [PATCH 1/2] [Clang][HIP] Reapply: Deprecate the AMDGCN_WAVEFRONT_SIZE
 macros

So far, these macros can be used in contexts where no meaningful
wavefront size is available. We therefore deprecate these macros, to
replace them with a more resilient interface to access wavefront size
information where it is available.

Reapplies #112849 with a fix for the non-hermetic clang test that failed
on Mac after the revert in #115499.

For SWDEV-491529.
---
 clang/docs/AMDGPUSupport.rst  |   4 +-
 clang/docs/HIPSupport.rst |   2 +-
 clang/include/clang/Basic/MacroBuilder.h  |   8 +-
 clang/lib/Basic/Targets/AMDGPU.cpp|   9 +-
 ...wavefront-size-deprecation-diagnostics.hip | 111 ++
 5 files changed, 127 insertions(+), 7 deletions(-)
 create mode 100644 
clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip

diff --git a/clang/docs/AMDGPUSupport.rst b/clang/docs/AMDGPUSupport.rst
index e63c0e1ba7d67b..3eada5f900613a 100644
--- a/clang/docs/AMDGPUSupport.rst
+++ b/clang/docs/AMDGPUSupport.rst
@@ -50,9 +50,9 @@ Predefined Macros
* - ``__AMDGCN_UNSAFE_FP_ATOMICS__``
  - Defined if unsafe floating-point atomics are allowed.
* - ``__AMDGCN_WAVEFRONT_SIZE__``
- - Defines the wavefront size. Allowed values are 32 and 64.
+ - Defines the wavefront size. Allowed values are 32 and 64 (deprecated).
* - ``__AMDGCN_WAVEFRONT_SIZE``
- - Alias to ``__AMDGCN_WAVEFRONT_SIZE__``. To be deprecated.
+ - Alias to ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated).
* - ``__HAS_FMAF__``
  - Defined if FMAF instruction is available (deprecated).
* - ``__HAS_LDEXPF__``
diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst
index e26297c7af97ac..e830acd8dd85c0 100644
--- a/clang/docs/HIPSupport.rst
+++ b/clang/docs/HIPSupport.rst
@@ -178,7 +178,7 @@ Predefined Macros
 
 Note that some architecture specific AMDGPU macros will have default values 
when
 used from the HIP host compilation. Other :doc:`AMDGPU macros `
-like ``__AMDGCN_WAVEFRONT_SIZE__`` will default to 64 for example.
+like ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated) will default to 64 for example.
 
 Compilation Modes
 =
diff --git a/clang/include/clang/Basic/MacroBuilder.h 
b/clang/include/clang/Basic/MacroBuilder.h
index 96e67cbbfa3f21..d83f27c236e3d8 100644
--- a/clang/include/clang/Basic/MacroBuilder.h
+++ b/clang/include/clang/Basic/MacroBuilder.h
@@ -26,8 +26,14 @@ class MacroBuilder {
   MacroBuilder(raw_ostream &Output) : Out(Output) {}
 
   /// Append a \#define line for macro of the form "\#define Name Value\n".
-  void defineMacro(const Twine &Name, const Twine &Value = "1") {
+  /// If DeprecationMsg is provided, also append a pragma to deprecate the
+  /// defined macro.
+  void defineMacro(const Twine &Name, const Twine &Value = "1",
+   Twine DeprecationMsg = "") {
 Out << "#define " << Name << ' ' << Value << '\n';
+if (!DeprecationMsg.isTriviallyEmpty())
+  Out << "#pragma clang deprecated(" << Name << ", \"" << DeprecationMsg
+  << "\")\n";
   }
 
   /// Append a \#undef line for Name.  Name should be of the form XXX
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 078819183afdac..99f8f2944e2796 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -337,9 +337,12 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions 
&Opts,
   if (hasFastFMA())
 Builder.defineMacro("FP_FAST_FMA");
 
-  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
-  // ToDo: deprecate this macro for naming consistency.
-  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
+  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize),
+  "compile-time-constant access to the wavefront size will 
"
+  "be removed in a future release");
+  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize),
+  "compile-time-constant access to the wavefront size will 
"
+  "be removed in a future release");
   Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
 }
 
diff --git a/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip 
b/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip
n

[clang] [Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros (PR #112849)

2024-11-08 Thread Fabian Ritter via cfe-commits



@@ -0,0 +1,111 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang -xhip --offload-arch=gfx1030 --offload-host-only -pedantic 
-nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify %s
+// RUN: %clang -xhip --offload-arch=gfx1030 --offload-device-only -pedantic 
-nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify %s
+
+// Test that deprecation warnings for the wavefront size macro are emitted 
properly.
+
+#include 

ritter-x2a wrote:

@nico Thanks for the report and sorry for the inconvenience! I reverted the 
patch with #115499 and opened PR #115507 with a fix. I'd appreciate it if you 
could take a look and see if that fixes the issue.

https://github.com/llvm/llvm-project/pull/112849
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Reapply: Deprecate the AMDGCN_WAVEFRONT_SIZE macros (PR #115507)

2024-11-10 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a closed 
https://github.com/llvm/llvm-project/pull/115507
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Revert "[Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros" (PR #115499)

2024-11-08 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a created 
https://github.com/llvm/llvm-project/pull/115499

Reverts llvm/llvm-project#112849 due to test failure on Mac, reported by @nico 

>From 4e1351cf2eb08cc4d8ba1ee5538d3a8e7cfa5aa8 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Fri, 8 Nov 2024 16:19:59 +0100
Subject: [PATCH] Revert "[Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE
 macros (#112849)"

This reverts commit e5c6d1f4e6d6c8709f92b47717cffc486947ff1b.
---
 clang/docs/AMDGPUSupport.rst  |   4 +-
 clang/docs/HIPSupport.rst |   2 +-
 clang/include/clang/Basic/MacroBuilder.h  |   8 +-
 clang/lib/Basic/Targets/AMDGPU.cpp|   9 +-
 ...wavefront-size-deprecation-diagnostics.hip | 111 --
 5 files changed, 7 insertions(+), 127 deletions(-)
 delete mode 100644 
clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip

diff --git a/clang/docs/AMDGPUSupport.rst b/clang/docs/AMDGPUSupport.rst
index 3eada5f900613a..e63c0e1ba7d67b 100644
--- a/clang/docs/AMDGPUSupport.rst
+++ b/clang/docs/AMDGPUSupport.rst
@@ -50,9 +50,9 @@ Predefined Macros
* - ``__AMDGCN_UNSAFE_FP_ATOMICS__``
  - Defined if unsafe floating-point atomics are allowed.
* - ``__AMDGCN_WAVEFRONT_SIZE__``
- - Defines the wavefront size. Allowed values are 32 and 64 (deprecated).
+ - Defines the wavefront size. Allowed values are 32 and 64.
* - ``__AMDGCN_WAVEFRONT_SIZE``
- - Alias to ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated).
+ - Alias to ``__AMDGCN_WAVEFRONT_SIZE__``. To be deprecated.
* - ``__HAS_FMAF__``
  - Defined if FMAF instruction is available (deprecated).
* - ``__HAS_LDEXPF__``
diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst
index e830acd8dd85c0..e26297c7af97ac 100644
--- a/clang/docs/HIPSupport.rst
+++ b/clang/docs/HIPSupport.rst
@@ -178,7 +178,7 @@ Predefined Macros
 
 Note that some architecture specific AMDGPU macros will have default values 
when
 used from the HIP host compilation. Other :doc:`AMDGPU macros `
-like ``__AMDGCN_WAVEFRONT_SIZE__`` (deprecated) will default to 64 for example.
+like ``__AMDGCN_WAVEFRONT_SIZE__`` will default to 64 for example.
 
 Compilation Modes
 =
diff --git a/clang/include/clang/Basic/MacroBuilder.h 
b/clang/include/clang/Basic/MacroBuilder.h
index d83f27c236e3d8..96e67cbbfa3f21 100644
--- a/clang/include/clang/Basic/MacroBuilder.h
+++ b/clang/include/clang/Basic/MacroBuilder.h
@@ -26,14 +26,8 @@ class MacroBuilder {
   MacroBuilder(raw_ostream &Output) : Out(Output) {}
 
   /// Append a \#define line for macro of the form "\#define Name Value\n".
-  /// If DeprecationMsg is provided, also append a pragma to deprecate the
-  /// defined macro.
-  void defineMacro(const Twine &Name, const Twine &Value = "1",
-   Twine DeprecationMsg = "") {
+  void defineMacro(const Twine &Name, const Twine &Value = "1") {
 Out << "#define " << Name << ' ' << Value << '\n';
-if (!DeprecationMsg.isTriviallyEmpty())
-  Out << "#pragma clang deprecated(" << Name << ", \"" << DeprecationMsg
-  << "\")\n";
   }
 
   /// Append a \#undef line for Name.  Name should be of the form XXX
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 99f8f2944e2796..078819183afdac 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -337,12 +337,9 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions 
&Opts,
   if (hasFastFMA())
 Builder.defineMacro("FP_FAST_FMA");
 
-  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize),
-  "compile-time-constant access to the wavefront size will 
"
-  "be removed in a future release");
-  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize),
-  "compile-time-constant access to the wavefront size will 
"
-  "be removed in a future release");
+  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
+  // ToDo: deprecate this macro for naming consistency.
+  Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
   Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
 }
 
diff --git a/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip 
b/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip
deleted file mode 100644
index aca591536a76c0..00
--- a/clang/test/Driver/hip-wavefront-size-deprecation-diagnostics.hip
+++ /dev/null
@@ -1,111 +0,0 @@
-// REQUIRES: amdgpu-registered-target
-// RUN: %clang -xhip --offload-arch=gfx1030 --offload-host-only -pedantic 
-nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify %s
-// RUN: %clang -xhip --offload-arch=gfx1030 --offload-device-only -pedantic 
-nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify %s
-
-// Test that deprecation warnings for the wavefront size macro are emitted 
properly.
-
-#incl

[clang] Revert "[Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros" (PR #115499)

2024-11-08 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a closed 
https://github.com/llvm/llvm-project/pull/115499
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Target-dependent overload resolution in declarators and specifiers (PR #103031)

2024-09-23 Thread Fabian Ritter via cfe-commits



@@ -0,0 +1,703 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only 
-verify=expected,onhost %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device 
-verify=expected,ondevice %s
+
+
+// Tests to ensure that functions with host and device overloads in that are
+// called outside of function bodies and variable initializers, e.g., in
+// template arguments are resolved with respect to the declaration to which 
they
+// belong.
+
+// Opaque types used for tests:
+struct DeviceTy {};
+struct HostTy {};
+struct HostDeviceTy {};
+struct TemplateTy {};
+
+struct TrueTy { static const bool value = true; };
+struct FalseTy { static const bool value = false; };
+
+// Select one of two types based on a boolean condition.
+template  struct select_type {};
+template  struct select_type { typedef T 
type; };
+template  struct select_type { typedef F 
type; };
+
+template  struct check : public select_type { };
+
+// Check if two types are the same.
+template struct is_same : public FalseTy { };
+template struct is_same : public TrueTy { };
+
+// A static assertion that fails at compile time if the expression E does not
+// have type T.
+#define ASSERT_HAS_TYPE(E, T) static_assert(is_same::value);
+
+
+// is_on_device() is true when called in a device context and false if called 
in a host context.
+__attribute__((host)) constexpr bool is_on_device(void) { return false; }
+__attribute__((device)) constexpr bool is_on_device(void) { return true; }
+
+
+// this type depends on whether it occurs in host or device code
+#define targetdep_t select_type::type
+
+// Defines and typedefs with different values in host and device compilation.
+#ifdef __CUDA_ARCH__
+#define CurrentTarget DEVICE
+typedef DeviceTy CurrentTargetTy;
+typedef DeviceTy TemplateIfHostTy;
+#else
+#define CurrentTarget HOST
+typedef HostTy CurrentTargetTy;
+typedef TemplateTy TemplateIfHostTy;
+#endif
+
+
+
+// targetdep_t in function declarations should depend on the target of the
+// declared function.
+__attribute__((device)) targetdep_t decl_ret_early_device(void);
+ASSERT_HAS_TYPE(decl_ret_early_device(), DeviceTy)
+
+__attribute__((host)) targetdep_t decl_ret_early_host(void);
+ASSERT_HAS_TYPE(decl_ret_early_host(), HostTy)
+
+__attribute__((host,device)) targetdep_t decl_ret_early_host_device(void);
+ASSERT_HAS_TYPE(decl_ret_early_host_device(), CurrentTargetTy)
+
+// If the function target is specified too late and can therefore not be
+// considered for overload resolution in targetdep_t, warn.
+targetdep_t __attribute__((device)) decl_ret_late_device(void); // 
expected-warning {{target attribute has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_late_device(), HostTy)
+
+// No warning necessary if the ignored attribute doesn't change the result.
+targetdep_t __attribute__((host)) decl_ret_late_host(void);
+ASSERT_HAS_TYPE(decl_ret_late_host(), HostTy)
+
+targetdep_t __attribute__((host,device)) decl_ret_late_host_device(void); // 
expected-warning {{target attribute has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_late_host_device(), HostTy)
+
+// An odd way of writing this, but it's possible.
+__attribute__((device)) targetdep_t __attribute__((host)) 
decl_ret_early_device_late_host(void); // expected-warning {{target attribute 
has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_early_device_late_host(), DeviceTy)
+
+
+// The same for function definitions and parameter types:
+__attribute__((device)) targetdep_t ret_early_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+__attribute__((host)) targetdep_t ret_early_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((host, device)) targetdep_t ret_early_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+// The parameter is still after the attribute, so it needs no warning.
+targetdep_t __attribute__((device)) // expected-warning {{target attribute has 
been ignored for overload resolution}}
+ret_late_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+targetdep_t __attribute__((host, device)) // expected-warning {{target 
attribute has been ignored for overload resolution}}
+ret_late_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((device)) targetdep_t __attribute__((host)) // expected-warning 
{{target attribute has been ignored for overload resolution}}
+ret_early_device_late_host(targetdep_t x)

[clang] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code without relying on target-dependent overload resolution (PR #109663)

2024-09-23 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a created 
https://github.com/llvm/llvm-project/pull/109663

This is a proposal for an alternative to PR #91478 that would make PRs #93546 
and #103031 unnecessary. Please let me know if this one is preferrable over PRs 
#91478 and #103031.

The `__AMDGCN_WAVEFRONT_SIZE` and `__AMDGCN_WAVEFRONT_SIZE__` macros in HIP can 
only provide meaningful values during device compilation. They are currently 
usable in host code, but only contain the default value of 64, independent of 
the target device(s).

This patch checks for numeric literals in clearly identifiable host code if 
they are the result of expanding the wavefront-size macros and issues a 
diagnostic if that's the case.

The alternative PR, #91478, relied on constexpr functions with host and device 
overloads (where the host overload is marked as deprecated) to diagnose uses of 
these macros in host code. A problem with this approach are uses of the macros 
outside of function bodies, e.g., in template arguments of return types, or 
default template arguments of functions. In these cases, calls to functions 
with target overloads are resolved to the host variant during host compilation 
and to the device variant during device compilation - independently of the 
target of the function they belong to. Therefore, using the wavefront size 
macros in such cases leads to diagnostics during host compilation with #91478, 
even if they are only associated to a device function.

PR #93546 is a proposal to suppress these spurious diagnostics. PR #103031 is a 
proposal to change the behavior of target-dependent overload resolution outside 
of function bodies to use the target attributes that occur before the 
overloaded call to select the overload candidate.

In contrast to #91478, this PR will not diagnose uses of the wavefront-size 
macros outside of function bodies or initializers of global host variables.

Implements SWDEV-449015.

>From 33d853eaa12431fe4dce3a69407d4ad25173ea2f Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Mon, 23 Sep 2024 08:54:07 -0400
Subject: [PATCH] [Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in
 host code without relying on target-dependent overload resolution

The __AMDGCN_WAVEFRONT_SIZE and __AMDGCN_WAVEFRONT_SIZE__ macros in HIP can
only provide meaningful values during device compilation. They are currently
usable in host code, but only contain the default value of 64, independent of
the target device(s).

This patch checks for numeric literals in clearly identifiable host code if
they are the result of expanding the wavefront-size macros and issues a
diagnostic if that's the case.

A alternative PR, #91478, relied on constexpr functions with host and device
overloads (where the host overload is marked as deprecated) to diagnose uses of
these macros in host code. A problem with this approach are uses of the macros
outside of function bodies, e.g., in template arguments of return types, or
default template arguments of functions. In these cases, calls to functions
with target overloads are resolved to the host variant during host compilation
and to the device variant during device compilation - independently of the
target of the function they belong to. Therefore, using the wavefront size
macros in such cases leads to diagnostics during host compilation with #91478,
even if they are only associated to a device function.

PR #93546 is a proposal to suppress these spurious diagnostics. PR #103031 is a
proposal to change the behavior of target-dependent overload resolution outside
of function bodies to use the target attributes that occur before the
overloaded call to select the overload candidate.

In contrast to #91478, this PR will not diagnose uses of the wavefront-size
macros outside of function bodies or initializers of global host variables.

Implements SWDEV-449015.
---
 .../clang/Basic/DiagnosticSemaKinds.td|   2 +
 clang/include/clang/Sema/SemaCUDA.h   |   4 +
 clang/lib/Sema/SemaCUDA.cpp   |  39 +++
 clang/lib/Sema/SemaExpr.cpp   |   3 +
 .../hip-wavefront-size-host-diagnostics.hip   | 109 ++
 5 files changed, 157 insertions(+)
 create mode 100644 clang/test/Driver/hip-wavefront-size-host-diagnostics.hip

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e4e04bff8b5120..557d2803021f60 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9109,6 +9109,8 @@ def warn_offload_incompatible_redeclare : Warning<
   "new declaration is %select{__device__|__global__|__host__|__host__ 
__device__}0 function, "
   "old declaration is %select{__device__|__global__|__host__|__host__ 
__device__}1 function">,
   InGroup>, DefaultIgnore;
+def warn_ref_device_macro_on_host : Warning<
+  "device-specific macro %0 is not available in a 
%select{__device__|__global__|__host__|__host__ __device__}1

[clang] [Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros (PR #112849)

2024-11-06 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

@AlexVlx @jhuber6 @arsenm  is there a dependence between this deprecation PR 
and #114481, or can we already go ahead with the deprecation of the macro via 
this PR?

https://github.com/llvm/llvm-project/pull/112849
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][HIP] Deprecate the AMDGCN_WAVEFRONT_SIZE macros (PR #112849)

2024-11-08 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a closed 
https://github.com/llvm/llvm-project/pull/112849
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][NFC][docs] Fix typo in LanguageExtensions (PR #121576)

2025-01-03 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a created 
https://github.com/llvm/llvm-project/pull/121576

None

>From 67b8039fcd54adc2a599fcf6aa3f62ed8b348f3c Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Fri, 3 Jan 2025 09:43:35 -0500
Subject: [PATCH] [clang][NFC][docs] Fix typo in LanguageExtensions

---
 clang/docs/LanguageExtensions.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index cc5f1d4ddf4477..e020710c7aa4f5 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -3641,7 +3641,7 @@ program location should be executed. It is expected to be 
used to implement
 `_
 intrinsic.
 
-The ``__builtin_allow_runtime_check()`` can be used within constrol structures
+The ``__builtin_allow_runtime_check()`` can be used within control structures
 like ``if`` to guard expensive runtime checks. The return value is determined
 by the following compiler options and may differ per call site:
 

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Clang/Preprocessor: Support short circuit in directive (PR #123912)

2025-01-22 Thread Fabian Ritter via cfe-commits



@@ -23,7 +23,7 @@ template __attribute__((host, device)) int 
templatify(int x) {
 __attribute__((device)) const int GlobalConst = __AMDGCN_WAVEFRONT_SIZE__; // 
expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as 
deprecated}}
 constexpr int GlobalConstExpr = __AMDGCN_WAVEFRONT_SIZE__; // expected-warning 
{{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as deprecated}}
 
-#if defined(__HIP_DEVICE_COMPILE__) && (__AMDGCN_WAVEFRONT_SIZE__ == 64) // 
expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as 
deprecated}}
+#if (__AMDGCN_WAVEFRONT_SIZE__ == 64) && defined(__HIP_DEVICE_COMPILE__) // 
expected-warning {{macro '__AMDGCN_WAVEFRONT_SIZE__' has been marked as 
deprecated}}

ritter-x2a wrote:

@AaronBallman thanks for notifying me! I think `-fcuda-is-device` isn't 
necessary; as far as I'm aware (I just tried) the second `RUN` line with 
`--offload-device-only` leads to a clang run with `__HIP_DEVICE_COMPILE__` 
defined to 1.

I'd prefer if we left this expression in the test as it was and changed the 
`expected-warning` comment behind it to an `ondevice-warning` and used 
`-verify=expected,ondevice` in the second `RUN` line instead, if we change 
preprocessing such that the deprecation warning isn't triggered due to 
short-circuit evaluation.

https://github.com/llvm/llvm-project/pull/123912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] Clang/Preprocessor: Support short circuit in directive (PR #123912)

2025-01-22 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

I think it would be more useful in practice if the deprecation warnings 
respected lazy evaluation, as the PR suggests, but it's worth noting that this 
deviates from the C Standard, which specifies that macros are replaced first, 
before short-circuit evaluation takes place.

https://github.com/llvm/llvm-project/pull/123912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][NFC][docs] Fix typo in LanguageExtensions (PR #121576)

2025-01-03 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a closed 
https://github.com/llvm/llvm-project/pull/121576
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[flang] [libc] [libclc] [llvm] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in offload and libclc (PR #125826)

2025-02-13 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a edited 
https://github.com/llvm/llvm-project/pull/125826
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[flang] [libc] [libclc] [llvm] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in offload and libclc (PR #125826)

2025-02-13 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/125826

>From bdee20130567c0a2f4d492f2b573f1681c5afbdf Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 5 Feb 2025 04:19:00 -0500
Subject: [PATCH] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in offload and
 libclc

gfx940 and gfx941 are no longer supported. This is one of a series of
PRs to remove them from the code base.

For SWDEV-512631 and SWDEV-512633
---
 flang/cmake/modules/AddFlangOffloadRuntime.cmake | 2 +-
 libc/docs/gpu/using.rst  | 2 +-
 libclc/CMakeLists.txt| 2 +-
 offload/plugins-nextgen/amdgpu/src/rtl.cpp   | 6 --
 offload/test/lit.cfg | 4 +---
 5 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/flang/cmake/modules/AddFlangOffloadRuntime.cmake 
b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
index 8e4f47d18535d..f1f6eb57c5d6c 100644
--- a/flang/cmake/modules/AddFlangOffloadRuntime.cmake
+++ b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
@@ -98,7 +98,7 @@ macro(enable_omp_offload_compilation files)
 
   set(all_amdgpu_architectures
 "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
-"gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030"
+"gfx908;gfx90a;gfx90c;gfx942;gfx1010;gfx1030"
 "gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
 "gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151"
 "gfx1152;gfx1153"
diff --git a/libc/docs/gpu/using.rst b/libc/docs/gpu/using.rst
index 1c1f9c9bfb0c6..f17f6287be313 100644
--- a/libc/docs/gpu/using.rst
+++ b/libc/docs/gpu/using.rst
@@ -44,7 +44,7 @@ this shouldn't be necessary.
 
   $> clang openmp.c -fopenmp --offload-arch=gfx90a -Xoffload-linker -lc
   $> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc 
-Xoffload-linker -lc
-  $> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc 
-Xoffload-linker -lc
+  $> clang hip.hip --offload-arch=gfx942 --offload-new-driver -fgpu-rdc 
-Xoffload-linker -lc
 
 This will automatically link in the needed function definitions if they were
 required by the user's application. Normally using the ``-fgpu-rdc`` option
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index c88ea9700d100..cddff9d0ec31a 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -211,7 +211,7 @@ set( cayman_aliases aruba )
 set( tahiti_aliases pitcairn verde oland hainan bonaire kabini kaveri hawaii
   mullins tonga tongapro iceland carrizo fiji stoney polaris10 polaris11
   gfx602 gfx705 gfx805
-  gfx900 gfx902 gfx904 gfx906 gfx908 gfx909 gfx90a gfx90c gfx940 gfx941 gfx942
+  gfx900 gfx902 gfx904 gfx906 gfx908 gfx909 gfx90a gfx90c gfx942
   gfx1010 gfx1011 gfx1012 gfx1013
   gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036
   gfx1100 gfx1101 gfx1102 gfx1103
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp 
b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 92184ba796dbd..e83d38a14f77f 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2854,12 +2854,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, 
AMDGenericDeviceTy {
   Error checkIfAPU() {
 // TODO: replace with ROCr API once it becomes available.
 llvm::StringRef StrGfxName(ComputeUnitKind);
-IsAPU = llvm::StringSwitch(StrGfxName)
-.Case("gfx940", true)
-.Default(false);
-if (IsAPU)
-  return Plugin::success();
-
 bool MayBeAPU = llvm::StringSwitch(StrGfxName)
 .Case("gfx942", true)
 .Default(false);
diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg
index 1e265d2c30904..f017bca85dd4f 100644
--- a/offload/test/lit.cfg
+++ b/offload/test/lit.cfg
@@ -134,12 +134,10 @@ elif 
config.libomptarget_current_target.startswith('amdgcn'):
 # amdgpu_test_arch contains a list of AMD GPUs in the system
 # only check the first one assuming that we will run the test on it.
 if not (config.amdgpu_test_arch.startswith("gfx90a") or
-config.amdgpu_test_arch.startswith("gfx940") or
 config.amdgpu_test_arch.startswith("gfx942")):
supports_unified_shared_memory = False
 # check if AMD architecture is an APU:
-if (config.amdgpu_test_arch.startswith("gfx940") or
-(config.amdgpu_test_arch.startswith("gfx942") and
+if ((config.amdgpu_test_arch.startswith("gfx942") and
  evaluate_bool_env(config.environment['IS_APU']))):
supports_apu = True
 if supports_unified_shared_memory:

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [flang] [libc] [libclc] [llvm] [mlir] [AMDGPU] Remove FeatureForceStoreSC0SC1 (PR #126878)

2025-02-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a edited 
https://github.com/llvm/llvm-project/pull/126878
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in llvm (PR #126763)

2025-02-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a closed 
https://github.com/llvm/llvm-project/pull/126763
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [flang] [libc] [libclc] [llvm] [mlir] [AMDGPU][docs][NFC] Replace gfx940 with gfx942 in the gfx940 ISA doc (PR #126906)

2025-02-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a edited 
https://github.com/llvm/llvm-project/pull/126906
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[flang] [libc] [libclc] [llvm] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in offload and libclc (PR #125826)

2025-02-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a closed 
https://github.com/llvm/llvm-project/pull/125826
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[flang] [libc] [libclc] [llvm] [AMDGPU] Add missing gfx architectures to AddFlangOffloadRuntime.cmake (PR #125827)

2025-02-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a edited 
https://github.com/llvm/llvm-project/pull/125827
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[flang] [libc] [libclc] [llvm] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in offload and libclc (PR #125826)

2025-02-19 Thread Fabian Ritter via cfe-commits


ritter-x2a wrote:

### Merge activity

* **Feb 19, 3:48 AM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/125826).


https://github.com/llvm/llvm-project/pull/125826
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[flang] [libc] [libclc] [llvm] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in offload and libclc (PR #125826)

2025-02-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/125826

>From 26e2dd5585fb9891ec7b62f46ac0c694d801af28 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 5 Feb 2025 04:19:00 -0500
Subject: [PATCH] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in offload and
 libclc

gfx940 and gfx941 are no longer supported. This is one of a series of
PRs to remove them from the code base.

For SWDEV-512631 and SWDEV-512633
---
 flang/cmake/modules/AddFlangOffloadRuntime.cmake | 2 +-
 libc/docs/gpu/using.rst  | 2 +-
 libclc/CMakeLists.txt| 2 +-
 offload/plugins-nextgen/amdgpu/src/rtl.cpp   | 6 --
 offload/test/lit.cfg | 4 +---
 5 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/flang/cmake/modules/AddFlangOffloadRuntime.cmake 
b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
index 8e4f47d18535d..f1f6eb57c5d6c 100644
--- a/flang/cmake/modules/AddFlangOffloadRuntime.cmake
+++ b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
@@ -98,7 +98,7 @@ macro(enable_omp_offload_compilation files)
 
   set(all_amdgpu_architectures
 "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
-"gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030"
+"gfx908;gfx90a;gfx90c;gfx942;gfx1010;gfx1030"
 "gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
 "gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151"
 "gfx1152;gfx1153"
diff --git a/libc/docs/gpu/using.rst b/libc/docs/gpu/using.rst
index 1c1f9c9bfb0c6..f17f6287be313 100644
--- a/libc/docs/gpu/using.rst
+++ b/libc/docs/gpu/using.rst
@@ -44,7 +44,7 @@ this shouldn't be necessary.
 
   $> clang openmp.c -fopenmp --offload-arch=gfx90a -Xoffload-linker -lc
   $> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc 
-Xoffload-linker -lc
-  $> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc 
-Xoffload-linker -lc
+  $> clang hip.hip --offload-arch=gfx942 --offload-new-driver -fgpu-rdc 
-Xoffload-linker -lc
 
 This will automatically link in the needed function definitions if they were
 required by the user's application. Normally using the ``-fgpu-rdc`` option
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 5cefa8a264310..05a2b87a56bc4 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -211,7 +211,7 @@ set( cayman_aliases aruba )
 set( tahiti_aliases pitcairn verde oland hainan bonaire kabini kaveri hawaii
   mullins tonga tongapro iceland carrizo fiji stoney polaris10 polaris11
   gfx602 gfx705 gfx805
-  gfx900 gfx902 gfx904 gfx906 gfx908 gfx909 gfx90a gfx90c gfx940 gfx941 gfx942
+  gfx900 gfx902 gfx904 gfx906 gfx908 gfx909 gfx90a gfx90c gfx942
   gfx1010 gfx1011 gfx1012 gfx1013
   gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036
   gfx1100 gfx1101 gfx1102 gfx1103
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp 
b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 92184ba796dbd..e83d38a14f77f 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2854,12 +2854,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, 
AMDGenericDeviceTy {
   Error checkIfAPU() {
 // TODO: replace with ROCr API once it becomes available.
 llvm::StringRef StrGfxName(ComputeUnitKind);
-IsAPU = llvm::StringSwitch(StrGfxName)
-.Case("gfx940", true)
-.Default(false);
-if (IsAPU)
-  return Plugin::success();
-
 bool MayBeAPU = llvm::StringSwitch(StrGfxName)
 .Case("gfx942", true)
 .Default(false);
diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg
index 1e265d2c30904..f017bca85dd4f 100644
--- a/offload/test/lit.cfg
+++ b/offload/test/lit.cfg
@@ -134,12 +134,10 @@ elif 
config.libomptarget_current_target.startswith('amdgcn'):
 # amdgpu_test_arch contains a list of AMD GPUs in the system
 # only check the first one assuming that we will run the test on it.
 if not (config.amdgpu_test_arch.startswith("gfx90a") or
-config.amdgpu_test_arch.startswith("gfx940") or
 config.amdgpu_test_arch.startswith("gfx942")):
supports_unified_shared_memory = False
 # check if AMD architecture is an APU:
-if (config.amdgpu_test_arch.startswith("gfx940") or
-(config.amdgpu_test_arch.startswith("gfx942") and
+if ((config.amdgpu_test_arch.startswith("gfx942") and
  evaluate_bool_env(config.environment['IS_APU']))):
supports_apu = True
 if supports_unified_shared_memory:

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[flang] [libc] [libclc] [llvm] [mlir] [AMDGPU][MLIR] Replace gfx940 and gfx941 with gfx942 in MLIR (PR #125836)

2025-02-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a edited 
https://github.com/llvm/llvm-project/pull/125836
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [flang] [libc] [libclc] [llvm] [mlir] [AMDGPU][clang] Replace gfx940 and gfx941 with gfx942 in clang (PR #126762)

2025-02-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a edited 
https://github.com/llvm/llvm-project/pull/126762
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AMDGPU][clang] Replace gfx940 and gfx941 with gfx942 in clang (PR #126762)

2025-02-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/126762

>From fd4aa87feb5e6fdfd6c6eefee8c31d5f97aed4fa Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Tue, 11 Feb 2025 08:52:55 -0500
Subject: [PATCH] [AMDGPU][clang] Replace gfx940 and gfx941 with gfx942 in
 clang

gfx940 and gfx941 are no longer supported. This is one of a series of
PRs to remove them from the code base.

This PR removes all occurrences of gfx940/gfx941 from clang that can be
removed without changes in the llvm directory. The
target-invalid-cpu-note/amdgcn.c test is not included here since it
tests a list of targets that is defined in
llvm/lib/TargetParser/TargetParser.cpp.

For SWDEV-512631
---
 clang/include/clang/Basic/Cuda.h  |   2 -
 clang/lib/Basic/Cuda.cpp  |   2 -
 clang/lib/Basic/Targets/NVPTX.cpp |   2 -
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp  |   2 -
 clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu   |   2 +-
 clang/test/CodeGenOpenCL/amdgpu-features.cl   |   4 -
 .../test/CodeGenOpenCL/builtins-amdgcn-fp8.cl |   2 +-
 ...cn-gfx940.cl => builtins-amdgcn-gfx942.cl} |   2 +-
 .../builtins-amdgcn-gfx950-err.cl |   2 +-
 .../builtins-amdgcn-gws-insts.cl  |   2 +-
 .../CodeGenOpenCL/builtins-amdgcn-mfma.cl | 110 +-
 ...fx940.cl => builtins-fp-atomics-gfx942.cl} |  34 +++---
 clang/test/Driver/amdgpu-macros.cl|   2 -
 clang/test/Driver/amdgpu-mcpu.cl  |   4 -
 clang/test/Driver/cuda-bad-arch.cu|   2 +-
 clang/test/Driver/hip-macros.hip  |  10 +-
 .../test/Misc/target-invalid-cpu-note/nvptx.c |   2 -
 ... => builtins-amdgcn-error-gfx942-param.cl} |   2 +-
 .../builtins-amdgcn-error-gfx950.cl   |   2 +-
 ...0-err.cl => builtins-amdgcn-gfx942-err.cl} |  14 +--
 20 files changed, 91 insertions(+), 113 deletions(-)
 rename clang/test/CodeGenOpenCL/{builtins-amdgcn-gfx940.cl => 
builtins-amdgcn-gfx942.cl} (98%)
 rename clang/test/CodeGenOpenCL/{builtins-fp-atomics-gfx940.cl => 
builtins-fp-atomics-gfx942.cl} (84%)
 rename clang/test/SemaOpenCL/{builtins-amdgcn-error-gfx940-param.cl => 
builtins-amdgcn-error-gfx942-param.cl} (99%)
 rename clang/test/SemaOpenCL/{builtins-amdgcn-gfx940-err.cl => 
builtins-amdgcn-gfx942-err.cl} (81%)

diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index f33ba46233a7a..793cab1f4e84a 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -106,8 +106,6 @@ enum class OffloadArch {
   GFX90a,
   GFX90c,
   GFX9_4_GENERIC,
-  GFX940,
-  GFX941,
   GFX942,
   GFX950,
   GFX10_1_GENERIC,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 1bfec0b37c5ee..f45fb0eca3714 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -124,8 +124,6 @@ static const OffloadArchToStringMap arch_names[] = {
 GFX(90a),  // gfx90a
 GFX(90c),  // gfx90c
 {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"},
-GFX(940),  // gfx940
-GFX(941),  // gfx941
 GFX(942),  // gfx942
 GFX(950),  // gfx950
 {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp 
b/clang/lib/Basic/Targets/NVPTX.cpp
index 7d13c1f145440..547cf3dfa2be7 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -211,8 +211,6 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
&Opts,
   case OffloadArch::GFX90a:
   case OffloadArch::GFX90c:
   case OffloadArch::GFX9_4_GENERIC:
-  case OffloadArch::GFX940:
-  case OffloadArch::GFX941:
   case OffloadArch::GFX942:
   case OffloadArch::GFX950:
   case OffloadArch::GFX10_1_GENERIC:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index c13928f61a748..826ec4da8ea28 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2302,8 +2302,6 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const 
OMPRequiresDecl *D) {
   case OffloadArch::GFX90a:
   case OffloadArch::GFX90c:
   case OffloadArch::GFX9_4_GENERIC:
-  case OffloadArch::GFX940:
-  case OffloadArch::GFX941:
   case OffloadArch::GFX942:
   case OffloadArch::GFX950:
   case OffloadArch::GFX10_1_GENERIC:
diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu 
b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
index 47fa3967fe237..37fca614c3111 100644
--- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -11,7 +11,7 @@
 // RUN:   -fnative-half-arguments-and-returns | FileCheck -check-prefix=SAFE %s
 
 // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
-// RUN:   -fcuda-is-device -target-cpu gfx940 -fnative-half-type \
+// RUN:   -fcuda-is-device -target-cpu gfx942 -fnative-half-type \
 // RUN:   -fnative-half-arguments-and-retur

[clang] [AMDGPU][clang] Replace gfx940 and gfx941 with gfx942 in clang (PR #126762)

2025-02-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/126762

>From 0f0e65aebd835a9c7df70ecd8d9e429ca523f09f Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Tue, 11 Feb 2025 08:52:55 -0500
Subject: [PATCH] [AMDGPU][clang] Replace gfx940 and gfx941 with gfx942 in
 clang

gfx940 and gfx941 are no longer supported. This is one of a series of
PRs to remove them from the code base.

This PR removes all occurrences of gfx940/gfx941 from clang that can be
removed without changes in the llvm directory. The
target-invalid-cpu-note/amdgcn.c test is not included here since it
tests a list of targets that is defined in
llvm/lib/TargetParser/TargetParser.cpp.

For SWDEV-512631
---
 clang/include/clang/Basic/Cuda.h  |   2 -
 clang/lib/Basic/Cuda.cpp  |   2 -
 clang/lib/Basic/Targets/NVPTX.cpp |   2 -
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp  |   2 -
 clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu   |   2 +-
 clang/test/CodeGenOpenCL/amdgpu-features.cl   |   4 -
 .../test/CodeGenOpenCL/builtins-amdgcn-fp8.cl |   2 +-
 ...cn-gfx940.cl => builtins-amdgcn-gfx942.cl} |   2 +-
 .../builtins-amdgcn-gfx950-err.cl |   2 +-
 .../builtins-amdgcn-gws-insts.cl  |   2 +-
 .../CodeGenOpenCL/builtins-amdgcn-mfma.cl | 110 +-
 ...fx940.cl => builtins-fp-atomics-gfx942.cl} |  34 +++---
 clang/test/Driver/amdgpu-macros.cl|   2 -
 clang/test/Driver/amdgpu-mcpu.cl  |   4 -
 clang/test/Driver/cuda-bad-arch.cu|   2 +-
 clang/test/Driver/hip-macros.hip  |  10 +-
 .../test/Misc/target-invalid-cpu-note/nvptx.c |   2 -
 ... => builtins-amdgcn-error-gfx942-param.cl} |   2 +-
 .../builtins-amdgcn-error-gfx950.cl   |   2 +-
 ...0-err.cl => builtins-amdgcn-gfx942-err.cl} |  14 +--
 20 files changed, 91 insertions(+), 113 deletions(-)
 rename clang/test/CodeGenOpenCL/{builtins-amdgcn-gfx940.cl => 
builtins-amdgcn-gfx942.cl} (98%)
 rename clang/test/CodeGenOpenCL/{builtins-fp-atomics-gfx940.cl => 
builtins-fp-atomics-gfx942.cl} (84%)
 rename clang/test/SemaOpenCL/{builtins-amdgcn-error-gfx940-param.cl => 
builtins-amdgcn-error-gfx942-param.cl} (99%)
 rename clang/test/SemaOpenCL/{builtins-amdgcn-gfx940-err.cl => 
builtins-amdgcn-gfx942-err.cl} (81%)

diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index f33ba46233a7a..793cab1f4e84a 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -106,8 +106,6 @@ enum class OffloadArch {
   GFX90a,
   GFX90c,
   GFX9_4_GENERIC,
-  GFX940,
-  GFX941,
   GFX942,
   GFX950,
   GFX10_1_GENERIC,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 1bfec0b37c5ee..f45fb0eca3714 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -124,8 +124,6 @@ static const OffloadArchToStringMap arch_names[] = {
 GFX(90a),  // gfx90a
 GFX(90c),  // gfx90c
 {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"},
-GFX(940),  // gfx940
-GFX(941),  // gfx941
 GFX(942),  // gfx942
 GFX(950),  // gfx950
 {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp 
b/clang/lib/Basic/Targets/NVPTX.cpp
index 7d13c1f145440..547cf3dfa2be7 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -211,8 +211,6 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
&Opts,
   case OffloadArch::GFX90a:
   case OffloadArch::GFX90c:
   case OffloadArch::GFX9_4_GENERIC:
-  case OffloadArch::GFX940:
-  case OffloadArch::GFX941:
   case OffloadArch::GFX942:
   case OffloadArch::GFX950:
   case OffloadArch::GFX10_1_GENERIC:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index c13928f61a748..826ec4da8ea28 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2302,8 +2302,6 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const 
OMPRequiresDecl *D) {
   case OffloadArch::GFX90a:
   case OffloadArch::GFX90c:
   case OffloadArch::GFX9_4_GENERIC:
-  case OffloadArch::GFX940:
-  case OffloadArch::GFX941:
   case OffloadArch::GFX942:
   case OffloadArch::GFX950:
   case OffloadArch::GFX10_1_GENERIC:
diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu 
b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
index 47fa3967fe237..37fca614c3111 100644
--- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -11,7 +11,7 @@
 // RUN:   -fnative-half-arguments-and-returns | FileCheck -check-prefix=SAFE %s
 
 // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
-// RUN:   -fcuda-is-device -target-cpu gfx940 -fnative-half-type \
+// RUN:   -fcuda-is-device -target-cpu gfx942 -fnative-half-type \
 // RUN:   -fnative-half-arguments-and-retur

[clang] [flang] [libc] [libclc] [llvm] [mlir] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in llvm (PR #126763)

2025-02-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a edited 
https://github.com/llvm/llvm-project/pull/126763
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AMDGPU][clang] Replace gfx940 and gfx941 with gfx942 in clang (PR #126762)

2025-02-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a closed 
https://github.com/llvm/llvm-project/pull/126762
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [flang] [libc] [libclc] [llvm] [mlir] [AMDGPU][docs] Replace gfx940 and gfx941 with gfx942 in llvm/docs (PR #126887)

2025-02-19 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a edited 
https://github.com/llvm/llvm-project/pull/126887
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[flang] [libc] [libclc] [llvm] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in offload and libclc (PR #125826)

2025-02-18 Thread Fabian Ritter via cfe-commits


https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/125826

>From 029f3b95f927a9c3e46c2660363f9555a6697425 Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Wed, 5 Feb 2025 04:19:00 -0500
Subject: [PATCH] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in offload and
 libclc

gfx940 and gfx941 are no longer supported. This is one of a series of
PRs to remove them from the code base.

For SWDEV-512631 and SWDEV-512633
---
 flang/cmake/modules/AddFlangOffloadRuntime.cmake | 2 +-
 libc/docs/gpu/using.rst  | 2 +-
 libclc/CMakeLists.txt| 2 +-
 offload/plugins-nextgen/amdgpu/src/rtl.cpp   | 6 --
 offload/test/lit.cfg | 4 +---
 5 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/flang/cmake/modules/AddFlangOffloadRuntime.cmake 
b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
index 8e4f47d18535d..f1f6eb57c5d6c 100644
--- a/flang/cmake/modules/AddFlangOffloadRuntime.cmake
+++ b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
@@ -98,7 +98,7 @@ macro(enable_omp_offload_compilation files)
 
   set(all_amdgpu_architectures
 "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
-"gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030"
+"gfx908;gfx90a;gfx90c;gfx942;gfx1010;gfx1030"
 "gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
 "gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151"
 "gfx1152;gfx1153"
diff --git a/libc/docs/gpu/using.rst b/libc/docs/gpu/using.rst
index 1c1f9c9bfb0c6..f17f6287be313 100644
--- a/libc/docs/gpu/using.rst
+++ b/libc/docs/gpu/using.rst
@@ -44,7 +44,7 @@ this shouldn't be necessary.
 
   $> clang openmp.c -fopenmp --offload-arch=gfx90a -Xoffload-linker -lc
   $> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc 
-Xoffload-linker -lc
-  $> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc 
-Xoffload-linker -lc
+  $> clang hip.hip --offload-arch=gfx942 --offload-new-driver -fgpu-rdc 
-Xoffload-linker -lc
 
 This will automatically link in the needed function definitions if they were
 required by the user's application. Normally using the ``-fgpu-rdc`` option
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 5cefa8a264310..05a2b87a56bc4 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -211,7 +211,7 @@ set( cayman_aliases aruba )
 set( tahiti_aliases pitcairn verde oland hainan bonaire kabini kaveri hawaii
   mullins tonga tongapro iceland carrizo fiji stoney polaris10 polaris11
   gfx602 gfx705 gfx805
-  gfx900 gfx902 gfx904 gfx906 gfx908 gfx909 gfx90a gfx90c gfx940 gfx941 gfx942
+  gfx900 gfx902 gfx904 gfx906 gfx908 gfx909 gfx90a gfx90c gfx942
   gfx1010 gfx1011 gfx1012 gfx1013
   gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036
   gfx1100 gfx1101 gfx1102 gfx1103
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp 
b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 92184ba796dbd..e83d38a14f77f 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2854,12 +2854,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, 
AMDGenericDeviceTy {
   Error checkIfAPU() {
 // TODO: replace with ROCr API once it becomes available.
 llvm::StringRef StrGfxName(ComputeUnitKind);
-IsAPU = llvm::StringSwitch(StrGfxName)
-.Case("gfx940", true)
-.Default(false);
-if (IsAPU)
-  return Plugin::success();
-
 bool MayBeAPU = llvm::StringSwitch(StrGfxName)
 .Case("gfx942", true)
 .Default(false);
diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg
index 1e265d2c30904..f017bca85dd4f 100644
--- a/offload/test/lit.cfg
+++ b/offload/test/lit.cfg
@@ -134,12 +134,10 @@ elif 
config.libomptarget_current_target.startswith('amdgcn'):
 # amdgpu_test_arch contains a list of AMD GPUs in the system
 # only check the first one assuming that we will run the test on it.
 if not (config.amdgpu_test_arch.startswith("gfx90a") or
-config.amdgpu_test_arch.startswith("gfx940") or
 config.amdgpu_test_arch.startswith("gfx942")):
supports_unified_shared_memory = False
 # check if AMD architecture is an APU:
-if (config.amdgpu_test_arch.startswith("gfx940") or
-(config.amdgpu_test_arch.startswith("gfx942") and
+if ((config.amdgpu_test_arch.startswith("gfx942") and
  evaluate_bool_env(config.environment['IS_APU']))):
supports_apu = True
 if supports_unified_shared_memory:

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AArch64][SelectionDAG] Add CodeGen support for scalar FEAT_CPA (PR #105669)

2025-03-28 Thread Fabian Ritter via cfe-commits



@@ -401,7 +401,7 @@ def tblockaddress: SDNode<"ISD::TargetBlockAddress",  
SDTPtrLeaf, [],
 
 def add: SDNode<"ISD::ADD"   , SDTIntBinOp   ,
 [SDNPCommutative, SDNPAssociative]>;
-def ptradd : SDNode<"ISD::ADD"   , SDTPtrAddOp, []>;
+def ptradd : SDNode<"ISD::PTRADD", SDTPtrAddOp, []>;

ritter-x2a wrote:

The [AMDGPU ptradd 
line](https://github.com/llvm/llvm-project/blob/a481452cd88acc180f82dd5631257c8954ed7812/llvm/lib/Target/AMDGPU/VOP3Instructions.td#L751)
 has an effect for global ISel, it shouldn't be removed. This ptradd SDNode was 
introduced as an equivalent to global ISel's G_PTR_ADD (as declared in 
[SelectionDAGCompat.td](https://github.com/llvm/llvm-project/blob/a481452cd88acc180f82dd5631257c8954ed7812/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td#L63)),
 to specify SDAG patterns that are auto-translated to global ISel patterns.
As far as I'm aware, it doesn't matter for that if `ptradd` uses `ISD::ADD` or 
`ISD::PTRADD`, so changing it as the PR currently does is fine.

https://github.com/llvm/llvm-project/pull/105669
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

67 matches

Mail list logo