[llvm] [flang] [clang] [NFC][AMDGPU] Move address space enum to LLVM directory (PR #73944)

2023-12-05 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/73944

>From 60ceda3d1025891f5037f020a2efe35108f62ca3 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Thu, 30 Nov 2023 08:06:12 -0600
Subject: [PATCH 1/2] [NFC][AMDGPU] Move address space enum to LLVM directory

Types of AMDGPU address space were defined in Clang-specific
class. In consequence this enum cannot be reused by other frontends
like Flang.

If we move address space enum to LLVM directory, then we can reuse
it in other frontends like Flang.
---
 clang/lib/Basic/Targets/AMDGPU.cpp| 80 +--
 clang/lib/Basic/Targets/AMDGPU.h  | 17 ++--
 flang/lib/Frontend/FrontendActions.cpp| 10 +--
 llvm/include/llvm/TargetParser/TargetParser.h |  9 +++
 4 files changed, 56 insertions(+), 60 deletions(-)

diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 409ae32ab4242..3fe9f9fa9c42d 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -37,50 +37,50 @@ static const char *const DataLayoutStringAMDGCN =
 "-ni:7:8";
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
-Generic,  // Default
-Global,   // opencl_global
-Local,// opencl_local
-Constant, // opencl_constant
-Private,  // opencl_private
-Generic,  // opencl_generic
-Global,   // opencl_global_device
-Global,   // opencl_global_host
-Global,   // cuda_device
-Constant, // cuda_constant
-Local,// cuda_shared
-Global,   // sycl_global
-Global,   // sycl_global_device
-Global,   // sycl_global_host
-Local,// sycl_local
-Private,  // sycl_private
-Generic,  // ptr32_sptr
-Generic,  // ptr32_uptr
-Generic,  // ptr64
-Generic,  // hlsl_groupshared
+llvm::AMDGPU::Generic,  // Default
+llvm::AMDGPU::Global,   // opencl_global
+llvm::AMDGPU::Local,// opencl_local
+llvm::AMDGPU::Constant, // opencl_constant
+llvm::AMDGPU::Private,  // opencl_private
+llvm::AMDGPU::Generic,  // opencl_generic
+llvm::AMDGPU::Global,   // opencl_global_device
+llvm::AMDGPU::Global,   // opencl_global_host
+llvm::AMDGPU::Global,   // cuda_device
+llvm::AMDGPU::Constant, // cuda_constant
+llvm::AMDGPU::Local,// cuda_shared
+llvm::AMDGPU::Global,   // sycl_global
+llvm::AMDGPU::Global,   // sycl_global_device
+llvm::AMDGPU::Global,   // sycl_global_host
+llvm::AMDGPU::Local,// sycl_local
+llvm::AMDGPU::Private,  // sycl_private
+llvm::AMDGPU::Generic,  // ptr32_sptr
+llvm::AMDGPU::Generic,  // ptr32_uptr
+llvm::AMDGPU::Generic,  // ptr64
+llvm::AMDGPU::Generic,  // hlsl_groupshared
 };
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
-Private,  // Default
-Global,   // opencl_global
-Local,// opencl_local
-Constant, // opencl_constant
-Private,  // opencl_private
-Generic,  // opencl_generic
-Global,   // opencl_global_device
-Global,   // opencl_global_host
-Global,   // cuda_device
-Constant, // cuda_constant
-Local,// cuda_shared
+llvm::AMDGPU::Private,  // Default
+llvm::AMDGPU::Global,   // opencl_global
+llvm::AMDGPU::Local,// opencl_local
+llvm::AMDGPU::Constant, // opencl_constant
+llvm::AMDGPU::Private,  // opencl_private
+llvm::AMDGPU::Generic,  // opencl_generic
+llvm::AMDGPU::Global,   // opencl_global_device
+llvm::AMDGPU::Global,   // opencl_global_host
+llvm::AMDGPU::Global,   // cuda_device
+llvm::AMDGPU::Constant, // cuda_constant
+llvm::AMDGPU::Local,// cuda_shared
 // SYCL address space values for this map are dummy
-Generic, // sycl_global
-Generic, // sycl_global_device
-Generic, // sycl_global_host
-Generic, // sycl_local
-Generic, // sycl_private
-Generic, // ptr32_sptr
-Generic, // ptr32_uptr
-Generic, // ptr64
-Generic, // hlsl_groupshared
+llvm::AMDGPU::Generic, // sycl_global
+llvm::AMDGPU::Generic, // sycl_global_device
+llvm::AMDGPU::Generic, // sycl_global_host
+llvm::AMDGPU::Generic, // sycl_local
+llvm::AMDGPU::Generic, // sycl_private
+llvm::AMDGPU::Generic, // ptr32_sptr
+llvm::AMDGPU::Generic, // ptr32_uptr
+llvm::AMDGPU::Generic, // ptr64
+llvm::AMDGPU::Generic, // hlsl_groupshared
 
 };
 } // namespace targets
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 300d9691d8a0f..1e12f9e12af59 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -29,13 +29,6 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : 
public TargetInfo {
 
   static const char *const GCCRegNames[];
 
-  enum AddrSpace {
-Generic = 0,
-Global = 1,
-Local = 3,
-Constant = 4,
-Private = 5
-  };
   static const LangASMap AMDGPUDefIsGenMap;
   static const LangASMap AMDGPUDefIsPrivMap;
 
@@ -106,7 +99

[llvm] [flang] [clang] [NFC][AMDGPU] Move address space enum to LLVM directory (PR #73944)

2023-12-05 Thread Dominik Adamski via cfe-commits


@@ -31,6 +31,15 @@ class Triple;
 // back-end to TableGen to create these clean tables.
 namespace AMDGPU {
 
+/// Address space values for AMD GPUs
+enum AddrSpace {
+  Generic = 0,

DominikAdamski wrote:

Done. Moved to enum class.

https://github.com/llvm/llvm-project/pull/73944
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [flang] [clang] [NFC][AMDGPU] Move address space enum to LLVM directory (PR #73944)

2023-12-05 Thread Dominik Adamski via cfe-commits

DominikAdamski wrote:

The address spaces for AMDGPU defined 
[here](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/AMDGPU.h#L395-L456)
 contain more types of address spaces in comparison to the enum defined in 
clang. Is it ok to extend number of address space types for clang?

https://github.com/llvm/llvm-project/pull/73944
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [llvm] [NFC][AMDGPU] Move address space enum to LLVM directory (PR #73944)

2023-12-06 Thread Dominik Adamski via cfe-commits

DominikAdamski wrote:

@lenary Thank you for your input.
@kparzysz @lenary Shall I add all address spaces which are mentioned in [other 
LLVM 
header](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/AMDGPU.h#L395-L456)?
 Currently I added address spaces which were mentioned in 
[Clang](https://github.com/llvm/llvm-project/pull/73944/files#diff-06ca78b74b85e093d5b7eddd16b24b4781b2025e6e8bf6c035345155c2649a4d)
 . 

https://github.com/llvm/llvm-project/pull/73944
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [llvm] [NFC][AMDGPU] Move address space enum to LLVM directory (PR #73944)

2023-12-07 Thread Dominik Adamski via cfe-commits


@@ -0,0 +1,31 @@
+//=== AMDGPUAddrSpace.h -*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+/// \file
+/// AMDGPU address space definition
+///
+//
+//===--===//
+
+#ifndef LLVM_SUPPORT_AMDGPUADDRSPACE_H
+#define LLVM_SUPPORT_AMDGPUADDRSPACE_H
+
+namespace llvm {
+namespace AMDGPU {
+enum class AddrSpace {

DominikAdamski wrote:

@arsenm I can consolidate these enums but I would like to be sure that I am 
allowed to do it. I was not sure why Clang enum has smaller range in comparison 
to LLVM enum.
My initial aim of this patch was to perform minimal code refactoring so that 
Clang and Flang can reuse the same enum. If you wish I can consolidate Flang, 
Clang and LLVM enums.

https://github.com/llvm/llvm-project/pull/73944
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [flang] [clang] [NFC][AMDGPU] Move address space enum to LLVM directory (PR #73944)

2023-12-08 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/73944

>From 60ceda3d1025891f5037f020a2efe35108f62ca3 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Thu, 30 Nov 2023 08:06:12 -0600
Subject: [PATCH 1/4] [NFC][AMDGPU] Move address space enum to LLVM directory

Types of AMDGPU address space were defined in Clang-specific
class. In consequence this enum cannot be reused by other frontends
like Flang.

If we move address space enum to LLVM directory, then we can reuse
it in other frontends like Flang.
---
 clang/lib/Basic/Targets/AMDGPU.cpp| 80 +--
 clang/lib/Basic/Targets/AMDGPU.h  | 17 ++--
 flang/lib/Frontend/FrontendActions.cpp| 10 +--
 llvm/include/llvm/TargetParser/TargetParser.h |  9 +++
 4 files changed, 56 insertions(+), 60 deletions(-)

diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 409ae32ab4242..3fe9f9fa9c42d 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -37,50 +37,50 @@ static const char *const DataLayoutStringAMDGCN =
 "-ni:7:8";
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
-Generic,  // Default
-Global,   // opencl_global
-Local,// opencl_local
-Constant, // opencl_constant
-Private,  // opencl_private
-Generic,  // opencl_generic
-Global,   // opencl_global_device
-Global,   // opencl_global_host
-Global,   // cuda_device
-Constant, // cuda_constant
-Local,// cuda_shared
-Global,   // sycl_global
-Global,   // sycl_global_device
-Global,   // sycl_global_host
-Local,// sycl_local
-Private,  // sycl_private
-Generic,  // ptr32_sptr
-Generic,  // ptr32_uptr
-Generic,  // ptr64
-Generic,  // hlsl_groupshared
+llvm::AMDGPU::Generic,  // Default
+llvm::AMDGPU::Global,   // opencl_global
+llvm::AMDGPU::Local,// opencl_local
+llvm::AMDGPU::Constant, // opencl_constant
+llvm::AMDGPU::Private,  // opencl_private
+llvm::AMDGPU::Generic,  // opencl_generic
+llvm::AMDGPU::Global,   // opencl_global_device
+llvm::AMDGPU::Global,   // opencl_global_host
+llvm::AMDGPU::Global,   // cuda_device
+llvm::AMDGPU::Constant, // cuda_constant
+llvm::AMDGPU::Local,// cuda_shared
+llvm::AMDGPU::Global,   // sycl_global
+llvm::AMDGPU::Global,   // sycl_global_device
+llvm::AMDGPU::Global,   // sycl_global_host
+llvm::AMDGPU::Local,// sycl_local
+llvm::AMDGPU::Private,  // sycl_private
+llvm::AMDGPU::Generic,  // ptr32_sptr
+llvm::AMDGPU::Generic,  // ptr32_uptr
+llvm::AMDGPU::Generic,  // ptr64
+llvm::AMDGPU::Generic,  // hlsl_groupshared
 };
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
-Private,  // Default
-Global,   // opencl_global
-Local,// opencl_local
-Constant, // opencl_constant
-Private,  // opencl_private
-Generic,  // opencl_generic
-Global,   // opencl_global_device
-Global,   // opencl_global_host
-Global,   // cuda_device
-Constant, // cuda_constant
-Local,// cuda_shared
+llvm::AMDGPU::Private,  // Default
+llvm::AMDGPU::Global,   // opencl_global
+llvm::AMDGPU::Local,// opencl_local
+llvm::AMDGPU::Constant, // opencl_constant
+llvm::AMDGPU::Private,  // opencl_private
+llvm::AMDGPU::Generic,  // opencl_generic
+llvm::AMDGPU::Global,   // opencl_global_device
+llvm::AMDGPU::Global,   // opencl_global_host
+llvm::AMDGPU::Global,   // cuda_device
+llvm::AMDGPU::Constant, // cuda_constant
+llvm::AMDGPU::Local,// cuda_shared
 // SYCL address space values for this map are dummy
-Generic, // sycl_global
-Generic, // sycl_global_device
-Generic, // sycl_global_host
-Generic, // sycl_local
-Generic, // sycl_private
-Generic, // ptr32_sptr
-Generic, // ptr32_uptr
-Generic, // ptr64
-Generic, // hlsl_groupshared
+llvm::AMDGPU::Generic, // sycl_global
+llvm::AMDGPU::Generic, // sycl_global_device
+llvm::AMDGPU::Generic, // sycl_global_host
+llvm::AMDGPU::Generic, // sycl_local
+llvm::AMDGPU::Generic, // sycl_private
+llvm::AMDGPU::Generic, // ptr32_sptr
+llvm::AMDGPU::Generic, // ptr32_uptr
+llvm::AMDGPU::Generic, // ptr64
+llvm::AMDGPU::Generic, // hlsl_groupshared
 
 };
 } // namespace targets
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 300d9691d8a0f..1e12f9e12af59 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -29,13 +29,6 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : 
public TargetInfo {
 
   static const char *const GCCRegNames[];
 
-  enum AddrSpace {
-Generic = 0,
-Global = 1,
-Local = 3,
-Constant = 4,
-Private = 5
-  };
   static const LangASMap AMDGPUDefIsGenMap;
   static const LangASMap AMDGPUDefIsPrivMap;
 
@@ -106,7 +99

[llvm] [flang] [clang] [NFC][AMDGPU] Move address space enum to LLVM directory (PR #73944)

2023-12-08 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/73944

>From 60ceda3d1025891f5037f020a2efe35108f62ca3 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Thu, 30 Nov 2023 08:06:12 -0600
Subject: [PATCH 1/4] [NFC][AMDGPU] Move address space enum to LLVM directory

Types of AMDGPU address space were defined in Clang-specific
class. In consequence this enum cannot be reused by other frontends
like Flang.

If we move address space enum to LLVM directory, then we can reuse
it in other frontends like Flang.
---
 clang/lib/Basic/Targets/AMDGPU.cpp| 80 +--
 clang/lib/Basic/Targets/AMDGPU.h  | 17 ++--
 flang/lib/Frontend/FrontendActions.cpp| 10 +--
 llvm/include/llvm/TargetParser/TargetParser.h |  9 +++
 4 files changed, 56 insertions(+), 60 deletions(-)

diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 409ae32ab4242..3fe9f9fa9c42d 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -37,50 +37,50 @@ static const char *const DataLayoutStringAMDGCN =
 "-ni:7:8";
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
-Generic,  // Default
-Global,   // opencl_global
-Local,// opencl_local
-Constant, // opencl_constant
-Private,  // opencl_private
-Generic,  // opencl_generic
-Global,   // opencl_global_device
-Global,   // opencl_global_host
-Global,   // cuda_device
-Constant, // cuda_constant
-Local,// cuda_shared
-Global,   // sycl_global
-Global,   // sycl_global_device
-Global,   // sycl_global_host
-Local,// sycl_local
-Private,  // sycl_private
-Generic,  // ptr32_sptr
-Generic,  // ptr32_uptr
-Generic,  // ptr64
-Generic,  // hlsl_groupshared
+llvm::AMDGPU::Generic,  // Default
+llvm::AMDGPU::Global,   // opencl_global
+llvm::AMDGPU::Local,// opencl_local
+llvm::AMDGPU::Constant, // opencl_constant
+llvm::AMDGPU::Private,  // opencl_private
+llvm::AMDGPU::Generic,  // opencl_generic
+llvm::AMDGPU::Global,   // opencl_global_device
+llvm::AMDGPU::Global,   // opencl_global_host
+llvm::AMDGPU::Global,   // cuda_device
+llvm::AMDGPU::Constant, // cuda_constant
+llvm::AMDGPU::Local,// cuda_shared
+llvm::AMDGPU::Global,   // sycl_global
+llvm::AMDGPU::Global,   // sycl_global_device
+llvm::AMDGPU::Global,   // sycl_global_host
+llvm::AMDGPU::Local,// sycl_local
+llvm::AMDGPU::Private,  // sycl_private
+llvm::AMDGPU::Generic,  // ptr32_sptr
+llvm::AMDGPU::Generic,  // ptr32_uptr
+llvm::AMDGPU::Generic,  // ptr64
+llvm::AMDGPU::Generic,  // hlsl_groupshared
 };
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
-Private,  // Default
-Global,   // opencl_global
-Local,// opencl_local
-Constant, // opencl_constant
-Private,  // opencl_private
-Generic,  // opencl_generic
-Global,   // opencl_global_device
-Global,   // opencl_global_host
-Global,   // cuda_device
-Constant, // cuda_constant
-Local,// cuda_shared
+llvm::AMDGPU::Private,  // Default
+llvm::AMDGPU::Global,   // opencl_global
+llvm::AMDGPU::Local,// opencl_local
+llvm::AMDGPU::Constant, // opencl_constant
+llvm::AMDGPU::Private,  // opencl_private
+llvm::AMDGPU::Generic,  // opencl_generic
+llvm::AMDGPU::Global,   // opencl_global_device
+llvm::AMDGPU::Global,   // opencl_global_host
+llvm::AMDGPU::Global,   // cuda_device
+llvm::AMDGPU::Constant, // cuda_constant
+llvm::AMDGPU::Local,// cuda_shared
 // SYCL address space values for this map are dummy
-Generic, // sycl_global
-Generic, // sycl_global_device
-Generic, // sycl_global_host
-Generic, // sycl_local
-Generic, // sycl_private
-Generic, // ptr32_sptr
-Generic, // ptr32_uptr
-Generic, // ptr64
-Generic, // hlsl_groupshared
+llvm::AMDGPU::Generic, // sycl_global
+llvm::AMDGPU::Generic, // sycl_global_device
+llvm::AMDGPU::Generic, // sycl_global_host
+llvm::AMDGPU::Generic, // sycl_local
+llvm::AMDGPU::Generic, // sycl_private
+llvm::AMDGPU::Generic, // ptr32_sptr
+llvm::AMDGPU::Generic, // ptr32_uptr
+llvm::AMDGPU::Generic, // ptr64
+llvm::AMDGPU::Generic, // hlsl_groupshared
 
 };
 } // namespace targets
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 300d9691d8a0f..1e12f9e12af59 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -29,13 +29,6 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : 
public TargetInfo {
 
   static const char *const GCCRegNames[];
 
-  enum AddrSpace {
-Generic = 0,
-Global = 1,
-Local = 3,
-Constant = 4,
-Private = 5
-  };
   static const LangASMap AMDGPUDefIsGenMap;
   static const LangASMap AMDGPUDefIsPrivMap;
 
@@ -106,7 +99

[llvm] [clang] [flang] [NFC][AMDGPU] Move address space enum to LLVM directory (PR #73944)

2023-12-08 Thread Dominik Adamski via cfe-commits


@@ -0,0 +1,31 @@
+//=== AMDGPUAddrSpace.h -*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+/// \file
+/// AMDGPU address space definition
+///
+//
+//===--===//
+
+#ifndef LLVM_SUPPORT_AMDGPUADDRSPACE_H
+#define LLVM_SUPPORT_AMDGPUADDRSPACE_H
+
+namespace llvm {
+namespace AMDGPU {
+enum class AddrSpace {

DominikAdamski wrote:

@arsenm done. I unified the clang enum with LLVM enum. I haven't modified the 
names of LLVM enum and I haven't introduced `enum class` instead of `AMDGPUAS 
namespace`. Please let me know if is ok for you.

https://github.com/llvm/llvm-project/pull/73944
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[flang] [clang] [llvm] [NFC][AMDGPU] Move address space enum to LLVM directory (PR #73944)

2023-12-11 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski closed 
https://github.com/llvm/llvm-project/pull/73944
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang] Add code-object-version option (PR #72638)

2023-11-17 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski created 
https://github.com/llvm/llvm-project/pull/72638

Information about code object version can be configured by the user for AMD GPU 
target and it needs to be placed in LLVM IR generated by Flang.

Information about code object version in MLIR generated by the parser can be 
reused by other tools. There is no need to specify extra flags if we want to 
invoke MLIR tools (like fir-opt) separately.

>From eb2710b0f736860dac62cc2ff8907fcefc64a8d6 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Fri, 17 Nov 2023 03:02:49 -0600
Subject: [PATCH] [Flang] Add code-object-version option

Information about code object version can be configured by the user
for AMD GPU target and it needs to be placed in LLVM IR generated
by Flang.

Information about code object version in MLIR generated by the
parser can be reused by other tools. There is no need to specify
extra flags if we want to invoke MLIR tools separately.
---
 clang/include/clang/Driver/Options.td |  2 +-
 clang/lib/Driver/ToolChains/Flang.cpp | 11 ++
 clang/lib/Driver/ToolChains/Flang.h   |  7 
 flang/include/flang/Frontend/CodeGenOptions.h | 13 +++
 flang/lib/Frontend/CompilerInvocation.cpp |  9 +
 flang/lib/Frontend/FrontendActions.cpp| 38 +--
 flang/test/Driver/code-object-version.f90 |  8 
 flang/test/Driver/driver-help-hidden.f90  |  2 +
 flang/test/Driver/driver-help.f90 |  4 ++
 flang/test/Lower/AMD/code_object_version.f90  | 11 ++
 10 files changed, 101 insertions(+), 4 deletions(-)
 create mode 100644 flang/test/Driver/code-object-version.f90
 create mode 100644 flang/test/Lower/AMD/code_object_version.f90

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 811550416110b3d..e7eb94d174e75f8 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4688,7 +4688,7 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
 
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, 
Group,
   HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
-  Visibility<[ClangOption, CC1Option]>,
+  Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
   Values<"none,4,5">,
   NormalizedValuesScope<"TargetOptions">,
   NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 8bdd920c3dcbb79..e60c11bfbe8e38b 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -204,6 +204,14 @@ void Flang::AddAArch64TargetArgs(const ArgList &Args,
   }
 }
 
+void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
+ArgStringList &CmdArgs) const {
+  if (Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) {
+StringRef Val = A->getValue();
+CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
+  }
+}
+
 void Flang::addTargetOptions(const ArgList &Args,
  ArgStringList &CmdArgs) const {
   const ToolChain &TC = getToolChain();
@@ -227,6 +235,9 @@ void Flang::addTargetOptions(const ArgList &Args,
 
   case llvm::Triple::r600:
   case llvm::Triple::amdgcn:
+getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
+AddAMDGPUTargetArgs(Args, CmdArgs);
+break;
   case llvm::Triple::riscv64:
   case llvm::Triple::x86_64:
 getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
diff --git a/clang/lib/Driver/ToolChains/Flang.h 
b/clang/lib/Driver/ToolChains/Flang.h
index 0141240b5d3ac90..8d35080e1c0c88b 100644
--- a/clang/lib/Driver/ToolChains/Flang.h
+++ b/clang/lib/Driver/ToolChains/Flang.h
@@ -63,6 +63,13 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
   void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
 llvm::opt::ArgStringList &CmdArgs) const;
 
+  /// Add specific options for AMDGPU target.
+  ///
+  /// \param [in] Args The list of input driver arguments
+  /// \param [out] CmdArgs The list of output command arguments
+  void AddAMDGPUTargetArgs(const llvm::opt::ArgList &Args,
+   llvm::opt::ArgStringList &CmdArgs) const;
+
   /// Extract offload options from the driver arguments and add them to
   /// the command arguments.
   /// \param [in] C The current compilation for the driver invocation
diff --git a/flang/include/flang/Frontend/CodeGenOptions.h 
b/flang/include/flang/Frontend/CodeGenOptions.h
index b86bb88610a9a4a..8d938c361a0aa23 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.h
+++ b/flang/include/flang/Frontend/CodeGenOptions.h
@@ -85,6 +85,19 @@ class CodeGenOptions : public CodeGenOptionsBase {
 RK_WithPattern, // Remark pattern specified via '-Rgroup=regexp'.
   };
 
+  /// \brief Enumeration value for AMDGPU code object version, which is the
+  /// code object version times 100.

[flang] [clang] [Flang] Add code-object-version option (PR #72638)

2023-11-17 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/72638

>From e5d339c24193e4e37013b3b25460009418d6ce6d Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Fri, 17 Nov 2023 03:02:49 -0600
Subject: [PATCH] [Flang] Add code-object-version option

Information about code object version can be configured by the user
for AMD GPU target and it needs to be placed in LLVM IR generated
by Flang.

Information about code object version in MLIR generated by the
parser can be reused by other tools. There is no need to specify
extra flags if we want to invoke MLIR tools separately.
---
 clang/include/clang/Driver/Options.td |  2 +-
 clang/lib/Driver/ToolChains/Flang.cpp | 11 ++
 clang/lib/Driver/ToolChains/Flang.h   |  7 
 flang/include/flang/Frontend/CodeGenOptions.h | 13 +++
 flang/lib/Frontend/CompilerInvocation.cpp |  9 +
 flang/lib/Frontend/FrontendActions.cpp| 39 +--
 flang/test/Driver/code-object-version.f90 |  8 
 flang/test/Driver/driver-help-hidden.f90  |  2 +
 flang/test/Driver/driver-help.f90 |  4 ++
 flang/test/Lower/AMD/code_object_version.f90  | 11 ++
 10 files changed, 101 insertions(+), 5 deletions(-)
 create mode 100644 flang/test/Driver/code-object-version.f90
 create mode 100644 flang/test/Lower/AMD/code_object_version.f90

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 811550416110b3d..e7eb94d174e75f8 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4688,7 +4688,7 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
 
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, 
Group,
   HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
-  Visibility<[ClangOption, CC1Option]>,
+  Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
   Values<"none,4,5">,
   NormalizedValuesScope<"TargetOptions">,
   NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 8bdd920c3dcbb79..e60c11bfbe8e38b 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -204,6 +204,14 @@ void Flang::AddAArch64TargetArgs(const ArgList &Args,
   }
 }
 
+void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
+ArgStringList &CmdArgs) const {
+  if (Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) {
+StringRef Val = A->getValue();
+CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
+  }
+}
+
 void Flang::addTargetOptions(const ArgList &Args,
  ArgStringList &CmdArgs) const {
   const ToolChain &TC = getToolChain();
@@ -227,6 +235,9 @@ void Flang::addTargetOptions(const ArgList &Args,
 
   case llvm::Triple::r600:
   case llvm::Triple::amdgcn:
+getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
+AddAMDGPUTargetArgs(Args, CmdArgs);
+break;
   case llvm::Triple::riscv64:
   case llvm::Triple::x86_64:
 getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
diff --git a/clang/lib/Driver/ToolChains/Flang.h 
b/clang/lib/Driver/ToolChains/Flang.h
index 0141240b5d3ac90..8d35080e1c0c88b 100644
--- a/clang/lib/Driver/ToolChains/Flang.h
+++ b/clang/lib/Driver/ToolChains/Flang.h
@@ -63,6 +63,13 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
   void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
 llvm::opt::ArgStringList &CmdArgs) const;
 
+  /// Add specific options for AMDGPU target.
+  ///
+  /// \param [in] Args The list of input driver arguments
+  /// \param [out] CmdArgs The list of output command arguments
+  void AddAMDGPUTargetArgs(const llvm::opt::ArgList &Args,
+   llvm::opt::ArgStringList &CmdArgs) const;
+
   /// Extract offload options from the driver arguments and add them to
   /// the command arguments.
   /// \param [in] C The current compilation for the driver invocation
diff --git a/flang/include/flang/Frontend/CodeGenOptions.h 
b/flang/include/flang/Frontend/CodeGenOptions.h
index b86bb88610a9a4a..8d938c361a0aa23 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.h
+++ b/flang/include/flang/Frontend/CodeGenOptions.h
@@ -85,6 +85,19 @@ class CodeGenOptions : public CodeGenOptionsBase {
 RK_WithPattern, // Remark pattern specified via '-Rgroup=regexp'.
   };
 
+  /// \brief Enumeration value for AMDGPU code object version, which is the
+  /// code object version times 100.
+  enum class CodeObjectVersionKind {
+COV_None,
+COV_2 = 200, // Unsupported.
+COV_3 = 300, // Unsupported.
+COV_4 = 400,
+COV_5 = 500,
+  };
+
+  /// \brief Code object version for AMDGPU.
+  CodeObjectVersionKind CodeObjectVersion = CodeObjectVersionKind::COV_None;
+
   /// Optimization remark with an optional regular expre

[clang] [flang] [llvm] [Flang] Add code-object-version option (PR #72638)

2023-11-22 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/72638

>From e5d339c24193e4e37013b3b25460009418d6ce6d Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Fri, 17 Nov 2023 03:02:49 -0600
Subject: [PATCH 1/2] [Flang] Add code-object-version option

Information about code object version can be configured by the user
for AMD GPU target and it needs to be placed in LLVM IR generated
by Flang.

Information about code object version in MLIR generated by the
parser can be reused by other tools. There is no need to specify
extra flags if we want to invoke MLIR tools separately.
---
 clang/include/clang/Driver/Options.td |  2 +-
 clang/lib/Driver/ToolChains/Flang.cpp | 11 ++
 clang/lib/Driver/ToolChains/Flang.h   |  7 
 flang/include/flang/Frontend/CodeGenOptions.h | 13 +++
 flang/lib/Frontend/CompilerInvocation.cpp |  9 +
 flang/lib/Frontend/FrontendActions.cpp| 39 +--
 flang/test/Driver/code-object-version.f90 |  8 
 flang/test/Driver/driver-help-hidden.f90  |  2 +
 flang/test/Driver/driver-help.f90 |  4 ++
 flang/test/Lower/AMD/code_object_version.f90  | 11 ++
 10 files changed, 101 insertions(+), 5 deletions(-)
 create mode 100644 flang/test/Driver/code-object-version.f90
 create mode 100644 flang/test/Lower/AMD/code_object_version.f90

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 811550416110b3d..e7eb94d174e75f8 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4688,7 +4688,7 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
 
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, 
Group,
   HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
-  Visibility<[ClangOption, CC1Option]>,
+  Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
   Values<"none,4,5">,
   NormalizedValuesScope<"TargetOptions">,
   NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 8bdd920c3dcbb79..e60c11bfbe8e38b 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -204,6 +204,14 @@ void Flang::AddAArch64TargetArgs(const ArgList &Args,
   }
 }
 
+void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
+ArgStringList &CmdArgs) const {
+  if (Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) {
+StringRef Val = A->getValue();
+CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
+  }
+}
+
 void Flang::addTargetOptions(const ArgList &Args,
  ArgStringList &CmdArgs) const {
   const ToolChain &TC = getToolChain();
@@ -227,6 +235,9 @@ void Flang::addTargetOptions(const ArgList &Args,
 
   case llvm::Triple::r600:
   case llvm::Triple::amdgcn:
+getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
+AddAMDGPUTargetArgs(Args, CmdArgs);
+break;
   case llvm::Triple::riscv64:
   case llvm::Triple::x86_64:
 getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
diff --git a/clang/lib/Driver/ToolChains/Flang.h 
b/clang/lib/Driver/ToolChains/Flang.h
index 0141240b5d3ac90..8d35080e1c0c88b 100644
--- a/clang/lib/Driver/ToolChains/Flang.h
+++ b/clang/lib/Driver/ToolChains/Flang.h
@@ -63,6 +63,13 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
   void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
 llvm::opt::ArgStringList &CmdArgs) const;
 
+  /// Add specific options for AMDGPU target.
+  ///
+  /// \param [in] Args The list of input driver arguments
+  /// \param [out] CmdArgs The list of output command arguments
+  void AddAMDGPUTargetArgs(const llvm::opt::ArgList &Args,
+   llvm::opt::ArgStringList &CmdArgs) const;
+
   /// Extract offload options from the driver arguments and add them to
   /// the command arguments.
   /// \param [in] C The current compilation for the driver invocation
diff --git a/flang/include/flang/Frontend/CodeGenOptions.h 
b/flang/include/flang/Frontend/CodeGenOptions.h
index b86bb88610a9a4a..8d938c361a0aa23 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.h
+++ b/flang/include/flang/Frontend/CodeGenOptions.h
@@ -85,6 +85,19 @@ class CodeGenOptions : public CodeGenOptionsBase {
 RK_WithPattern, // Remark pattern specified via '-Rgroup=regexp'.
   };
 
+  /// \brief Enumeration value for AMDGPU code object version, which is the
+  /// code object version times 100.
+  enum class CodeObjectVersionKind {
+COV_None,
+COV_2 = 200, // Unsupported.
+COV_3 = 300, // Unsupported.
+COV_4 = 400,
+COV_5 = 500,
+  };
+
+  /// \brief Code object version for AMDGPU.
+  CodeObjectVersionKind CodeObjectVersion = CodeObjectVersionKind::COV_None;
+
   /// Optimization remark with an optional regular e

[clang] [flang] [llvm] [Flang] Add code-object-version option (PR #72638)

2023-11-22 Thread Dominik Adamski via cfe-commits


@@ -264,6 +263,37 @@ static void addDepdendentLibs(mlir::ModuleOp &mlirModule,
   }
 }
 
+// Add to MLIR code target specific items which are dependent on target
+// configuration specified by the user
+static void addTargetSpecificMLIRItems(mlir::ModuleOp &mlirModule,
+   CompilerInstance &ci) {
+  const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts();
+  const llvm::Triple triple(targetOpts.triple);
+  if (triple.isAMDGPU()) {
+unsigned oclcABIVERsion;
+const unsigned defaultOclcABIVERsion = 400;

DominikAdamski wrote:

Done.

https://github.com/llvm/llvm-project/pull/72638
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [llvm] [Flang] Add code-object-version option (PR #72638)

2023-11-22 Thread Dominik Adamski via cfe-commits


@@ -264,6 +263,37 @@ static void addDepdendentLibs(mlir::ModuleOp &mlirModule,
   }
 }
 
+// Add to MLIR code target specific items which are dependent on target
+// configuration specified by the user
+static void addTargetSpecificMLIRItems(mlir::ModuleOp &mlirModule,
+   CompilerInstance &ci) {
+  const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts();
+  const llvm::Triple triple(targetOpts.triple);
+  if (triple.isAMDGPU()) {
+unsigned oclcABIVERsion;
+const unsigned defaultOclcABIVERsion = 400;
+mlir::OpBuilder builder(mlirModule.getContext());
+const CodeGenOptions &codeGenOpts = ci.getInvocation().getCodeGenOpts();
+if (codeGenOpts.CodeObjectVersion ==
+CodeGenOptions::CodeObjectVersionKind::COV_None)
+  oclcABIVERsion = defaultOclcABIVERsion;
+else
+  oclcABIVERsion = static_cast(codeGenOpts.CodeObjectVersion);
+
+auto int32Type = builder.getI32Type();
+auto covInfo = builder.create(
+mlirModule.getLoc(), int32Type, true, mlir::LLVM::Linkage::WeakODR,
+"__oclc_ABI_version",
+builder.getIntegerAttr(int32Type, oclcABIVERsion));
+covInfo.setUnnamedAddr(mlir::LLVM::UnnamedAddr::Local);
+covInfo.setAddrSpace(4);

DominikAdamski wrote:

This address space is described here: 
https://llvm.org/docs/AMDGPUUsage.html#address-spaces . 4 corresponds to 
Constant Address space. There is an enum AddrSpace which describes the address 
spaces. It is defined as part of Clang TargetInfo: 
https://github.com/llvm/llvm-project/blob/main/clang/lib/Basic/Targets/AMDGPU.h 
.

I will move this enum to llvm directory as the next step. Currently I added 
only TODO comment because TargetInfo is one of basic Clang classes and I would 
like to do it as separate step in case of any regression.

https://github.com/llvm/llvm-project/pull/72638
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [llvm] [Flang] Add code-object-version option (PR #72638)

2023-11-22 Thread Dominik Adamski via cfe-commits


@@ -264,6 +263,37 @@ static void addDepdendentLibs(mlir::ModuleOp &mlirModule,
   }
 }
 
+// Add to MLIR code target specific items which are dependent on target
+// configuration specified by the user
+static void addTargetSpecificMLIRItems(mlir::ModuleOp &mlirModule,
+   CompilerInstance &ci) {
+  const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts();

DominikAdamski wrote:

Done

https://github.com/llvm/llvm-project/pull/72638
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [flang] [Flang] Add code-object-version option (PR #72638)

2023-11-22 Thread Dominik Adamski via cfe-commits




DominikAdamski wrote:

Done

https://github.com/llvm/llvm-project/pull/72638
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [flang] [Flang] Add code-object-version option (PR #72638)

2023-11-22 Thread Dominik Adamski via cfe-commits


@@ -264,6 +263,37 @@ static void addDepdendentLibs(mlir::ModuleOp &mlirModule,
   }
 }
 
+// Add to MLIR code target specific items which are dependent on target
+// configuration specified by the user
+static void addTargetSpecificMLIRItems(mlir::ModuleOp &mlirModule,
+   CompilerInstance &ci) {
+  const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts();
+  const llvm::Triple triple(targetOpts.triple);
+  if (triple.isAMDGPU()) {

DominikAdamski wrote:

done

https://github.com/llvm/llvm-project/pull/72638
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [flang] [Flang] Add code-object-version option (PR #72638)

2023-11-22 Thread Dominik Adamski via cfe-commits


@@ -85,6 +85,19 @@ class CodeGenOptions : public CodeGenOptionsBase {
 RK_WithPattern, // Remark pattern specified via '-Rgroup=regexp'.
   };
 
+  /// \brief Enumeration value for AMDGPU code object version, which is the
+  /// code object version times 100.
+  enum class CodeObjectVersionKind {
+COV_None,
+COV_2 = 200, // Unsupported.
+COV_3 = 300, // Unsupported.
+COV_4 = 400,
+COV_5 = 500,
+  };

DominikAdamski wrote:

I moved it.

https://github.com/llvm/llvm-project/pull/72638
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [flang] [Flang] Add code-object-version option (PR #72638)

2023-11-22 Thread Dominik Adamski via cfe-commits


@@ -264,6 +263,37 @@ static void addDepdendentLibs(mlir::ModuleOp &mlirModule,
   }
 }
 
+// Add to MLIR code target specific items which are dependent on target
+// configuration specified by the user
+static void addTargetSpecificMLIRItems(mlir::ModuleOp &mlirModule,
+   CompilerInstance &ci) {
+  const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts();
+  const llvm::Triple triple(targetOpts.triple);
+  if (triple.isAMDGPU()) {
+unsigned oclcABIVERsion;
+const unsigned defaultOclcABIVERsion = 400;
+mlir::OpBuilder builder(mlirModule.getContext());
+const CodeGenOptions &codeGenOpts = ci.getInvocation().getCodeGenOpts();
+if (codeGenOpts.CodeObjectVersion ==
+CodeGenOptions::CodeObjectVersionKind::COV_None)
+  oclcABIVERsion = defaultOclcABIVERsion;
+else
+  oclcABIVERsion = static_cast(codeGenOpts.CodeObjectVersion);
+
+auto int32Type = builder.getI32Type();
+auto covInfo = builder.create(
+mlirModule.getLoc(), int32Type, true, mlir::LLVM::Linkage::WeakODR,
+"__oclc_ABI_version",
+builder.getIntegerAttr(int32Type, oclcABIVERsion));
+covInfo.setUnnamedAddr(mlir::LLVM::UnnamedAddr::Local);
+covInfo.setAddrSpace(4);
+covInfo.setVisibility_(mlir::LLVM::Visibility::Hidden);
+builder.setInsertionPointToStart(mlirModule.getBody());
+builder.insert(covInfo);
+  }
+  addDependentLibs(mlirModule, ci);

DominikAdamski wrote:

done

https://github.com/llvm/llvm-project/pull/72638
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[flang] [llvm] [clang] [Flang] Add code-object-version option (PR #72638)

2023-11-22 Thread Dominik Adamski via cfe-commits


@@ -85,6 +85,19 @@ class CodeGenOptions : public CodeGenOptionsBase {
 RK_WithPattern, // Remark pattern specified via '-Rgroup=regexp'.
   };
 
+  /// \brief Enumeration value for AMDGPU code object version, which is the
+  /// code object version times 100.
+  enum class CodeObjectVersionKind {
+COV_None,
+COV_2 = 200, // Unsupported.
+COV_3 = 300, // Unsupported.

DominikAdamski wrote:

COV_NONE means no emit of global item
COV_2,COV_3 -> unsupported
If user does not specify version then default version COV_4 is set
see file for more information about code-object version:  
https://llvm.org/docs/AMDGPUUsage.html#code-object-metadata .

BTW. Is it possible to set the default version for Flang in similar way as it 
is done for Clang? Could we use similar macro:  
MarshallingInfoEnum, "COV_4"> file ( 
https://github.com/llvm/llvm-project/blob/main/clang/include/clang/Driver/Options.td#L4716
 ); and replace Clang struct TargetOpts with Flang struct ? 


https://github.com/llvm/llvm-project/pull/72638
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[flang] [mlir] [clang] [Flang][OpenMP][MLIR] Add support for -nogpulib option (PR #71045)

2023-11-02 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski created 
https://github.com/llvm/llvm-project/pull/71045

If -nogpulib option is passed by the user, then the OpenMP device runtime is 
not used and we should not emit globals to configure debugging at compile-time 
for the device runtime.

Link to -nogpulib flag implementation for Clang: 
https://reviews.llvm.org/D125314  

>From 684ade39bda46edd6392521cc63902486659559b Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Tue, 31 Oct 2023 08:06:59 -0500
Subject: [PATCH 1/2] [MLIR][OpenMP] Add nogpulib parameter to OpenMP
 attributes

Added an OpenMP attribute which will model nogpulib flag.

If nogpulib option is passed by the user, then the OpenMP device
runtime is not used and we should not emit globals to configure
debugging at compile-time for the device runtime.

Link to -nogpulib flag implementation for Clang:
https://reviews.llvm.org/D125314
---
 flang/include/flang/Tools/CrossToolHelpers.h  | 11 +++
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  1 +
 .../mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td|  5 +++--
 .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp  |  8 ++--
 mlir/test/Dialect/OpenMP/attr.mlir|  6 ++
 mlir/test/Target/LLVMIR/openmp-llvm.mlir  | 10 ++
 6 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/flang/include/flang/Tools/CrossToolHelpers.h 
b/flang/include/flang/Tools/CrossToolHelpers.h
index ddec70fa9824c52..a1f5c871992a1f8 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -53,14 +53,16 @@ struct OffloadModuleOpts {
   OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
   bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
   bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
-  bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {})
+  bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {},
+  bool NoGPULib = false)
   : OpenMPTargetDebug(OpenMPTargetDebug),
 OpenMPTeamSubscription(OpenMPTeamSubscription),
 OpenMPThreadSubscription(OpenMPThreadSubscription),
 OpenMPNoThreadState(OpenMPNoThreadState),
 OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
 OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
-OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile) {}
+OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile),
+NoGPULib(NoGPULib) {}
 
   OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
   : OpenMPTargetDebug(Opts.OpenMPTargetDebug),
@@ -70,7 +72,7 @@ struct OffloadModuleOpts {
 OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
 OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
 OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
-OMPHostIRFile(Opts.OMPHostIRFile) {}
+OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(false) {}
 
   uint32_t OpenMPTargetDebug = 0;
   bool OpenMPTeamSubscription = false;
@@ -81,6 +83,7 @@ struct OffloadModuleOpts {
   bool OpenMPIsGPU = false;
   uint32_t OpenMPVersion = 11;
   std::string OMPHostIRFile = {};
+  bool NoGPULib = false;
 };
 
 //  Shares assinging of the OpenMP OffloadModuleInterface and its assorted
@@ -95,7 +98,7 @@ void setOffloadModuleInterfaceAttributes(
 if (Opts.OpenMPIsTargetDevice) {
   offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
   Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,
-  Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion);
+  Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, false);
 
   if (!Opts.OMPHostIRFile.empty())
 offloadMod.setHostIRFilePath(Opts.OMPHostIRFile);
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 72121ad4f9e47a5..ae2dd018059e3aa 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -65,6 +65,7 @@ def FlagsAttr : OpenMP_Attr<"Flags", "flags"> {
 DefaultValuedParameter<"bool", "false">:$assume_threads_oversubscription,
 DefaultValuedParameter<"bool", "false">:$assume_no_thread_state,
 DefaultValuedParameter<"bool", "false">:$assume_no_nested_parallelism,
+DefaultValuedParameter<"bool", "false">:$no_gpu_lib,
 DefaultValuedParameter<"uint32_t", "50">:$openmp_device_version
   );
 
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
index 77001fc816cf91f..89d04af64766fc2 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
@@ -198,11 +198,12 @@ def OffloadModuleInterface : 
OpInterface<"OffloadModuleInterface"> {
 "bool":$assumeThreadsOversubscription,
  

[clang] 2b1948c - [NFC][OpenMP][Clang]Update OpenMP clang tests

2023-11-03 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2023-11-03T05:13:06-05:00
New Revision: 2b1948c2be0e935bdd2e764ae1a2b84c405fdc04

URL: 
https://github.com/llvm/llvm-project/commit/2b1948c2be0e935bdd2e764ae1a2b84c405fdc04
DIFF: 
https://github.com/llvm/llvm-project/commit/2b1948c2be0e935bdd2e764ae1a2b84c405fdc04.diff

LOG: [NFC][OpenMP][Clang]Update OpenMP clang tests

Replace hardcoded constants by regular expressions

Added: 


Modified: 
clang/test/OpenMP/cancel_codegen.cpp
clang/test/OpenMP/parallel_codegen.cpp

Removed: 




diff  --git a/clang/test/OpenMP/cancel_codegen.cpp 
b/clang/test/OpenMP/cancel_codegen.cpp
index 53580e0c2b0293f..03024cf331b2717 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -1026,25 +1026,25 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META8:![0-9]+]])
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META10:![0-9]+]])
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META12:![0-9]+]])
-// CHECK3-NEXT:store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:[[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias !14
+// CHECK3-NEXT:store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, 
!noalias ![[NOALIAS0:[0-9]+]]
+// CHECK3-NEXT:store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:[[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
 // CHECK3-NEXT:[[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB12:[0-9]+]])
 // CHECK3-NEXT:[[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM_I]], i32 4)
 // CHECK3-NEXT:[[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
 // CHECK3-NEXT:br i1 [[TMP10]], label [[DOTCANCEL_EXIT_I:%.*]], label 
[[DOTCANCEL_CONTINUE_I:%.*]]
 // CHECK3:   .cancel.exit.i:
-// CHECK3-NEXT:store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
!14
+// CHECK3-NEXT:store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
![[NOALIAS1:[0-9]+]]
 // CHECK3-NEXT:br label [[DOTOMP_OUTLINED__EXIT:%.*]]
 // CHECK3:   .cancel.continue.i:
-// CHECK3-NEXT:store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
!14
+// CHECK3-NEXT:store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
![[NOALIAS1]]
 // CHECK3-NEXT:br label [[DOTOMP_OUTLINED__EXIT]]
 // CHECK3:   .omp_outlined..exit:
-// CHECK3-NEXT:[[CLEANUP_DEST_I:%.*]] = load i32, ptr 
[[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
+// CHECK3-NEXT:[[CLEANUP_DEST_I:%.*]] = load i32, ptr 
[[CLEANUP_DEST_SLOT_I]], align 4, !noalias ![[NOALIAS1]]
 // CHECK3-NEXT:ret i32 0
 //
 //

diff  --git a/clang/test/OpenMP/parallel_codegen.cpp 
b/clang/test/OpenMP/parallel_codegen.cpp
index 5c98761be0808ef..d545b4a9d9fa887 100644
--- a/clang/test/OpenMP/parallel_codegen.cpp
+++ b/clang/test/OpenMP/parallel_codegen.cpp
@@ -812,7 +812,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par
-// CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]]
 // CHECK3-NEXT:  omp.par.entry:
 // CHECK3-NEXT:[[GEP__RELOADED:%.*]] = getelementptr { ptr, ptr }, ptr 
[[TMP0]], i32 0, i32 0
 // CHECK3-NEXT:[[LOADGEP__RELOADED:%.*]] = load ptr, ptr 
[[GEP__RELOADED]], align 8
@@ -956,7 +956,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par
-// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG57:![0-9]+]] {
+// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG57:![0-9]+]] {
 // CHECK4-NEXT

[clang] 2b1948c - [NFC][OpenMP][Clang]Update OpenMP clang tests

2023-11-03 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2023-11-03T05:13:06-05:00
New Revision: 2b1948c2be0e935bdd2e764ae1a2b84c405fdc04

URL: 
https://github.com/llvm/llvm-project/commit/2b1948c2be0e935bdd2e764ae1a2b84c405fdc04
DIFF: 
https://github.com/llvm/llvm-project/commit/2b1948c2be0e935bdd2e764ae1a2b84c405fdc04.diff

LOG: [NFC][OpenMP][Clang]Update OpenMP clang tests

Replace hardcoded constants by regular expressions

Added: 


Modified: 
clang/test/OpenMP/cancel_codegen.cpp
clang/test/OpenMP/parallel_codegen.cpp

Removed: 




diff  --git a/clang/test/OpenMP/cancel_codegen.cpp 
b/clang/test/OpenMP/cancel_codegen.cpp
index 53580e0c2b0293f..03024cf331b2717 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -1026,25 +1026,25 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META8:![0-9]+]])
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META10:![0-9]+]])
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META12:![0-9]+]])
-// CHECK3-NEXT:store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:[[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias !14
+// CHECK3-NEXT:store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, 
!noalias ![[NOALIAS0:[0-9]+]]
+// CHECK3-NEXT:store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:[[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
 // CHECK3-NEXT:[[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB12:[0-9]+]])
 // CHECK3-NEXT:[[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM_I]], i32 4)
 // CHECK3-NEXT:[[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
 // CHECK3-NEXT:br i1 [[TMP10]], label [[DOTCANCEL_EXIT_I:%.*]], label 
[[DOTCANCEL_CONTINUE_I:%.*]]
 // CHECK3:   .cancel.exit.i:
-// CHECK3-NEXT:store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
!14
+// CHECK3-NEXT:store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
![[NOALIAS1:[0-9]+]]
 // CHECK3-NEXT:br label [[DOTOMP_OUTLINED__EXIT:%.*]]
 // CHECK3:   .cancel.continue.i:
-// CHECK3-NEXT:store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
!14
+// CHECK3-NEXT:store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
![[NOALIAS1]]
 // CHECK3-NEXT:br label [[DOTOMP_OUTLINED__EXIT]]
 // CHECK3:   .omp_outlined..exit:
-// CHECK3-NEXT:[[CLEANUP_DEST_I:%.*]] = load i32, ptr 
[[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
+// CHECK3-NEXT:[[CLEANUP_DEST_I:%.*]] = load i32, ptr 
[[CLEANUP_DEST_SLOT_I]], align 4, !noalias ![[NOALIAS1]]
 // CHECK3-NEXT:ret i32 0
 //
 //

diff  --git a/clang/test/OpenMP/parallel_codegen.cpp 
b/clang/test/OpenMP/parallel_codegen.cpp
index 5c98761be0808ef..d545b4a9d9fa887 100644
--- a/clang/test/OpenMP/parallel_codegen.cpp
+++ b/clang/test/OpenMP/parallel_codegen.cpp
@@ -812,7 +812,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par
-// CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]]
 // CHECK3-NEXT:  omp.par.entry:
 // CHECK3-NEXT:[[GEP__RELOADED:%.*]] = getelementptr { ptr, ptr }, ptr 
[[TMP0]], i32 0, i32 0
 // CHECK3-NEXT:[[LOADGEP__RELOADED:%.*]] = load ptr, ptr 
[[GEP__RELOADED]], align 8
@@ -956,7 +956,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par
-// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG57:![0-9]+]] {
+// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG57:![0-9]+]] {
 // CHECK4-NEXT

[flang] [clang] [llvm] [Flang] Add code-object-version option (PR #72638)

2023-11-28 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/72638

>From 1cc1d9d3480f750980ea0a395cca3b202a606f57 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Fri, 17 Nov 2023 03:02:49 -0600
Subject: [PATCH 1/2] [Flang] Add code-object-version option

Information about code object version can be configured by the user
for AMD GPU target and it needs to be placed in LLVM IR generated
by Flang.

Information about code object version in MLIR generated by the
parser can be reused by other tools. There is no need to specify
extra flags if we want to invoke MLIR tools separately.
---
 clang/include/clang/Driver/Options.td |  2 +-
 clang/lib/Driver/ToolChains/Flang.cpp | 11 ++
 clang/lib/Driver/ToolChains/Flang.h   |  7 
 flang/include/flang/Frontend/CodeGenOptions.h | 13 +++
 flang/lib/Frontend/CompilerInvocation.cpp |  9 +
 flang/lib/Frontend/FrontendActions.cpp| 39 +--
 flang/test/Driver/code-object-version.f90 |  8 
 flang/test/Driver/driver-help-hidden.f90  |  2 +
 flang/test/Driver/driver-help.f90 |  4 ++
 flang/test/Lower/AMD/code_object_version.f90  | 11 ++
 10 files changed, 101 insertions(+), 5 deletions(-)
 create mode 100644 flang/test/Driver/code-object-version.f90
 create mode 100644 flang/test/Lower/AMD/code_object_version.f90

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 9689f12fd01417b..c2c5bb6052efd57 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4721,7 +4721,7 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
 
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, 
Group,
   HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
-  Visibility<[ClangOption, CC1Option]>,
+  Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
   Values<"none,4,5">,
   NormalizedValuesScope<"TargetOptions">,
   NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 86e1c57e485685e..a6fa94defa5e217 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -277,6 +277,14 @@ static void processVSRuntimeLibrary(const ToolChain &TC, 
const ArgList &Args,
   }
 }
 
+void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
+ArgStringList &CmdArgs) const {
+  if (Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) {
+StringRef Val = A->getValue();
+CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
+  }
+}
+
 void Flang::addTargetOptions(const ArgList &Args,
  ArgStringList &CmdArgs) const {
   const ToolChain &TC = getToolChain();
@@ -300,6 +308,9 @@ void Flang::addTargetOptions(const ArgList &Args,
 
   case llvm::Triple::r600:
   case llvm::Triple::amdgcn:
+getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
+AddAMDGPUTargetArgs(Args, CmdArgs);
+break;
   case llvm::Triple::riscv64:
   case llvm::Triple::x86_64:
 getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
diff --git a/clang/lib/Driver/ToolChains/Flang.h 
b/clang/lib/Driver/ToolChains/Flang.h
index 0141240b5d3ac90..8d35080e1c0c88b 100644
--- a/clang/lib/Driver/ToolChains/Flang.h
+++ b/clang/lib/Driver/ToolChains/Flang.h
@@ -63,6 +63,13 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
   void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
 llvm::opt::ArgStringList &CmdArgs) const;
 
+  /// Add specific options for AMDGPU target.
+  ///
+  /// \param [in] Args The list of input driver arguments
+  /// \param [out] CmdArgs The list of output command arguments
+  void AddAMDGPUTargetArgs(const llvm::opt::ArgList &Args,
+   llvm::opt::ArgStringList &CmdArgs) const;
+
   /// Extract offload options from the driver arguments and add them to
   /// the command arguments.
   /// \param [in] C The current compilation for the driver invocation
diff --git a/flang/include/flang/Frontend/CodeGenOptions.h 
b/flang/include/flang/Frontend/CodeGenOptions.h
index b86bb88610a9a4a..8d938c361a0aa23 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.h
+++ b/flang/include/flang/Frontend/CodeGenOptions.h
@@ -85,6 +85,19 @@ class CodeGenOptions : public CodeGenOptionsBase {
 RK_WithPattern, // Remark pattern specified via '-Rgroup=regexp'.
   };
 
+  /// \brief Enumeration value for AMDGPU code object version, which is the
+  /// code object version times 100.
+  enum class CodeObjectVersionKind {
+COV_None,
+COV_2 = 200, // Unsupported.
+COV_3 = 300, // Unsupported.
+COV_4 = 400,
+COV_5 = 500,
+  };
+
+  /// \brief Code object version for AMDGPU.
+  CodeObjectVersionKind CodeObjectVersion = CodeObjectVersionKind::COV_None;
+
   /// Optimization remark w

[llvm] [clang] [flang] [Flang] Add code-object-version option (PR #72638)

2023-11-28 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski closed 
https://github.com/llvm/llvm-project/pull/72638
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] f00ffcd - Revert "[Flang] Add code-object-version option (#72638)"

2023-11-28 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2023-11-28T13:18:46-06:00
New Revision: f00ffcdb58d6db902a8f86b0ce83a03874d113ad

URL: 
https://github.com/llvm/llvm-project/commit/f00ffcdb58d6db902a8f86b0ce83a03874d113ad
DIFF: 
https://github.com/llvm/llvm-project/commit/f00ffcdb58d6db902a8f86b0ce83a03874d113ad.diff

LOG: Revert "[Flang] Add code-object-version option (#72638)"

This commit causes test errors on buildbots.

This reverts commit a8ac930b99d93b2a539ada7e566993d148899144.

Added: 


Modified: 
clang/include/clang/Basic/TargetOptions.h
clang/include/clang/Driver/Options.td
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/Targets/AMDGPU.cpp
clang/lib/Driver/ToolChains/Flang.cpp
clang/lib/Driver/ToolChains/Flang.h
flang/include/flang/Frontend/CodeGenOptions.h
flang/lib/Frontend/CompilerInvocation.cpp
flang/lib/Frontend/FrontendActions.cpp
flang/test/Driver/driver-help-hidden.f90
flang/test/Driver/driver-help.f90
llvm/include/llvm/Target/TargetOptions.h

Removed: 
flang/test/Driver/code-object-version.f90
flang/test/Lower/AMD/code-object-version.f90



diff  --git a/clang/include/clang/Basic/TargetOptions.h 
b/clang/include/clang/Basic/TargetOptions.h
index 2049f03b28893fd..ba3acd029587160 100644
--- a/clang/include/clang/Basic/TargetOptions.h
+++ b/clang/include/clang/Basic/TargetOptions.h
@@ -78,9 +78,17 @@ class TargetOptions {
   /// \brief If enabled, allow AMDGPU unsafe floating point atomics.
   bool AllowAMDGPUUnsafeFPAtomics = false;
 
+  /// \brief Enumeration value for AMDGPU code object version, which is the
+  /// code object version times 100.
+  enum CodeObjectVersionKind {
+COV_None,
+COV_2 = 200, // Unsupported.
+COV_3 = 300, // Unsupported.
+COV_4 = 400,
+COV_5 = 500,
+  };
   /// \brief Code object version for AMDGPU.
-  llvm::CodeObjectVersionKind CodeObjectVersion =
-  llvm::CodeObjectVersionKind::COV_None;
+  CodeObjectVersionKind CodeObjectVersion = CodeObjectVersionKind::COV_None;
 
   /// \brief Enumeration values for AMDGPU printf lowering scheme
   enum class AMDGPUPrintfKind {

diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 7dd2755350f7a56..9689f12fd01417b 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4721,9 +4721,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
 
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, 
Group,
   HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
-  Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
+  Visibility<[ClangOption, CC1Option]>,
   Values<"none,4,5">,
-  NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
+  NormalizedValuesScope<"TargetOptions">,
   NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
   MarshallingInfoEnum, "COV_4">;
 

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 65d9862621061d8..c83ea966fdeadc6 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17588,7 +17588,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, 
unsigned Index) {
 
   auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
 
-  if (Cov == CodeObjectVersionKind::COV_None) {
+  if (Cov == clang::TargetOptions::COV_None) {
 StringRef Name = "__oclc_ABI_version";
 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
 if (!ABIVersionC)
@@ -17606,7 +17606,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, 
unsigned Index) {
 
 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
 ABIVersion,
-llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
+llvm::ConstantInt::get(CGF.Int32Ty, clang::TargetOptions::COV_5));
 
 // Indexing the implicit kernarg segment.
 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
@@ -17621,7 +17621,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, 
unsigned Index) {
 Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
   } else {
 Value *GEP = nullptr;
-if (Cov == CodeObjectVersionKind::COV_5) {
+if (Cov == clang::TargetOptions::COV_5) {
   // Indexing the implicit kernarg segment.
   GEP = CGF.Builder.CreateConstGEP1_32(
   CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 3225c984768657a..41ff4a992f194ae 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -847,7 +847,7 @@ void CodeGenModule::Release() {
 // Emit amdgpu_code_object_version module flag, which is code object 
version
 // times 100.
 if (getTarget().getTargetOpts().CodeObjectVersion !=
-llvm::CodeObjectVersionKind::COV_None) {
+Targe

[clang] 95943d2 - [Flang] Add code-object-version option (#72638)

2023-11-29 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2023-11-29T03:01:01-06:00
New Revision: 95943d2fab7e6f8dcea216df2d56a0512201b467

URL: 
https://github.com/llvm/llvm-project/commit/95943d2fab7e6f8dcea216df2d56a0512201b467
DIFF: 
https://github.com/llvm/llvm-project/commit/95943d2fab7e6f8dcea216df2d56a0512201b467.diff

LOG: [Flang] Add code-object-version option (#72638)

Information about code object version can be configured by the user for
AMD GPU target and it needs to be placed in LLVM IR generated by Flang.

Information about code object version in MLIR generated by the parser
can be reused by other tools. There is no need to specify extra flags if
we want to invoke MLIR tools (like fir-opt) separately.

Changes in comparison to a8ac93:
 * added information about required targets for test
   flang/test/Driver/driver-help.f90

Added: 
flang/test/Driver/code-object-version.f90
flang/test/Lower/AMD/code-object-version.f90

Modified: 
clang/include/clang/Basic/TargetOptions.h
clang/include/clang/Driver/Options.td
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/Targets/AMDGPU.cpp
clang/lib/Driver/ToolChains/Flang.cpp
clang/lib/Driver/ToolChains/Flang.h
flang/include/flang/Frontend/CodeGenOptions.h
flang/lib/Frontend/CompilerInvocation.cpp
flang/lib/Frontend/FrontendActions.cpp
flang/test/Driver/driver-help-hidden.f90
flang/test/Driver/driver-help.f90
llvm/include/llvm/Target/TargetOptions.h

Removed: 




diff  --git a/clang/include/clang/Basic/TargetOptions.h 
b/clang/include/clang/Basic/TargetOptions.h
index ba3acd029587160..2049f03b28893fd 100644
--- a/clang/include/clang/Basic/TargetOptions.h
+++ b/clang/include/clang/Basic/TargetOptions.h
@@ -78,17 +78,9 @@ class TargetOptions {
   /// \brief If enabled, allow AMDGPU unsafe floating point atomics.
   bool AllowAMDGPUUnsafeFPAtomics = false;
 
-  /// \brief Enumeration value for AMDGPU code object version, which is the
-  /// code object version times 100.
-  enum CodeObjectVersionKind {
-COV_None,
-COV_2 = 200, // Unsupported.
-COV_3 = 300, // Unsupported.
-COV_4 = 400,
-COV_5 = 500,
-  };
   /// \brief Code object version for AMDGPU.
-  CodeObjectVersionKind CodeObjectVersion = CodeObjectVersionKind::COV_None;
+  llvm::CodeObjectVersionKind CodeObjectVersion =
+  llvm::CodeObjectVersionKind::COV_None;
 
   /// \brief Enumeration values for AMDGPU printf lowering scheme
   enum class AMDGPUPrintfKind {

diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 9689f12fd01417b..7dd2755350f7a56 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4721,9 +4721,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
 
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, 
Group,
   HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
-  Visibility<[ClangOption, CC1Option]>,
+  Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
   Values<"none,4,5">,
-  NormalizedValuesScope<"TargetOptions">,
+  NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
   NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
   MarshallingInfoEnum, "COV_4">;
 

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c83ea966fdeadc6..65d9862621061d8 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17588,7 +17588,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, 
unsigned Index) {
 
   auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
 
-  if (Cov == clang::TargetOptions::COV_None) {
+  if (Cov == CodeObjectVersionKind::COV_None) {
 StringRef Name = "__oclc_ABI_version";
 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
 if (!ABIVersionC)
@@ -17606,7 +17606,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, 
unsigned Index) {
 
 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
 ABIVersion,
-llvm::ConstantInt::get(CGF.Int32Ty, clang::TargetOptions::COV_5));
+llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
 
 // Indexing the implicit kernarg segment.
 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
@@ -17621,7 +17621,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, 
unsigned Index) {
 Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
   } else {
 Value *GEP = nullptr;
-if (Cov == clang::TargetOptions::COV_5) {
+if (Cov == CodeObjectVersionKind::COV_5) {
   // Indexing the implicit kernarg segment.
   GEP = CGF.Builder.CreateConstGEP1_32(
   CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 41ff4a992f194ae..3225c984768657a 100644
--- a/cla

[flang] [mlir] [clang] [Flang][OpenMP][MLIR] Add support for -nogpulib option (PR #71045)

2024-01-10 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski closed 
https://github.com/llvm/llvm-project/pull/71045
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] d90b7bf - Add support for lowering simd if clause to LLVM IR

2022-08-01 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2022-08-01T04:43:32-05:00
New Revision: d90b7bf2c53d0315a13a81904862929252bb6824

URL: 
https://github.com/llvm/llvm-project/commit/d90b7bf2c53d0315a13a81904862929252bb6824
DIFF: 
https://github.com/llvm/llvm-project/commit/d90b7bf2c53d0315a13a81904862929252bb6824.diff

LOG: Add support for lowering simd if clause to LLVM IR

Scope of changes:
  1) Added new function to generate loop versioning
  2) Added support for if clause to applySimd function
  2) Added tests which confirm that lowering is successful

If ifCond is specified, then collapsed loop is duplicated and if branch
is added. Duplicated loop is executed if simd ifCond is evaluated to false.

Reviewed By: Meinersbur

Differential Revision: https://reviews.llvm.org/D129368

Signed-off-by: Dominik Adamski 

Added: 


Modified: 
clang/lib/CodeGen/CGStmtOpenMP.cpp
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
mlir/test/Target/LLVMIR/openmp-llvm.mlir

Removed: 




diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index aa55cdaca5dca..962620f43a393 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2646,7 +2646,9 @@ void CodeGenFunction::EmitOMPSimdDirective(const 
OMPSimdDirective &S) {
   auto *Val = cast(Len.getScalarVal());
   Simdlen = Val;
 }
-OMPBuilder.applySimd(CLI, Simdlen);
+// Add simd metadata to the collapsed loop. Do not generate
+// another loop for if clause. Support for if clause is done earlier.
+OMPBuilder.applySimd(CLI, /*IfCond*/ nullptr, Simdlen);
 return;
   }
 };

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h 
b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 40ca2da4c911d..5ae9baab0e5d6 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
 
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/IRBuilder.h"
@@ -467,6 +468,20 @@ class OpenMPIRBuilder {
   bool NeedsBarrier,
   Value *Chunk = nullptr);
 
+  /// Create alternative version of the loop to support if clause
+  ///
+  /// OpenMP if clause can require to generate second loop. This loop
+  /// will be executed when if clause condition is not met. createIfVersion
+  /// adds branch instruction to the copied loop if \p  ifCond is not met.
+  ///
+  /// \param Loop   Original loop which should be versioned.
+  /// \param IfCond Value which corresponds to if clause condition
+  /// \param VMap   Value to value map to define relation between
+  ///   original and copied loop values and loop blocks.
+  /// \param NamePrefix Optional name prefix for if.then if.else blocks.
+  void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
+   ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
+
 public:
   /// Modifies the canonical loop to be a workshare loop.
   ///
@@ -597,11 +612,15 @@ class OpenMPIRBuilder {
   void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
  CanonicalLoopInfo **UnrolledCLI);
 
-  /// Add metadata to simd-ize a loop.
+  /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
+  /// is cloned. The metadata which prevents vectorization is added to
+  /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
+  /// to false.
   ///
   /// \param LoopThe loop to simd-ize.
+  /// \param IfCond  The value which corresponds to the if clause condition.
   /// \param Simdlen The Simdlen length to apply to the simd loop.
-  void applySimd(CanonicalLoopInfo *Loop, ConstantInt *Simdlen);
+  void applySimd(CanonicalLoopInfo *Loop, Value *IfCond, ConstantInt *Simdlen);
 
   /// Generator for '#omp flush'
   ///

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index cee4cddab5e89..736976d406438 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/CodeExtractor.h"
 #include "llvm/Transforms/Utils/LoopPeel.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
@@ -2839,32 +2840,40 @@ OpenMPIRBuilder::tileLo

[clang] [OpenMP][OMPIRBuilder] Add support to omp target parallel (PR #67000)

2023-10-23 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/67000

>From e801022968ea4a42632fbcf4c5ba03e67a32c7ae Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Mon, 11 Sep 2023 05:31:37 -0400
Subject: [PATCH] [OpenMP][OMPIRBuilder] Add support to omp target parallel

Added support for LLVM IR code generation which is used for handling
omp target parallel code. The call for __kmpc_parallel_51 is generated
and the parallel region is outlined to separate function.

The proper setup of kmpc_target_init mode is not included in the commit.
It is assumed that the SPMD mode for target init is properly set by other
codegen functions.
---
 clang/test/OpenMP/cancel_codegen.cpp  |  20 +-
 clang/test/OpenMP/parallel_codegen.cpp|   4 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 314 +-
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp |   1 +
 .../Frontend/OpenMPIRBuilderTest.cpp  | 139 +++-
 5 files changed, 378 insertions(+), 100 deletions(-)

diff --git a/clang/test/OpenMP/cancel_codegen.cpp 
b/clang/test/OpenMP/cancel_codegen.cpp
index 53580e0c2b0293f..03024cf331b2717 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -1026,25 +1026,25 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META8:![0-9]+]])
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META10:![0-9]+]])
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META12:![0-9]+]])
-// CHECK3-NEXT:store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:[[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias !14
+// CHECK3-NEXT:store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, 
!noalias ![[NOALIAS0:[0-9]+]]
+// CHECK3-NEXT:store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:[[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
 // CHECK3-NEXT:[[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB12:[0-9]+]])
 // CHECK3-NEXT:[[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM_I]], i32 4)
 // CHECK3-NEXT:[[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
 // CHECK3-NEXT:br i1 [[TMP10]], label [[DOTCANCEL_EXIT_I:%.*]], label 
[[DOTCANCEL_CONTINUE_I:%.*]]
 // CHECK3:   .cancel.exit.i:
-// CHECK3-NEXT:store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
!14
+// CHECK3-NEXT:store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
![[NOALIAS1:[0-9]+]]
 // CHECK3-NEXT:br label [[DOTOMP_OUTLINED__EXIT:%.*]]
 // CHECK3:   .cancel.continue.i:
-// CHECK3-NEXT:store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
!14
+// CHECK3-NEXT:store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
![[NOALIAS1]]
 // CHECK3-NEXT:br label [[DOTOMP_OUTLINED__EXIT]]
 // CHECK3:   .omp_outlined..exit:
-// CHECK3-NEXT:[[CLEANUP_DEST_I:%.*]] = load i32, ptr 
[[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
+// CHECK3-NEXT:[[CLEANUP_DEST_I:%.*]] = load i32, ptr 
[[CLEANUP_DEST_SLOT_I]], align 4, !noalias ![[NOALIAS1]]
 // CHECK3-NEXT:ret i32 0
 //
 //
diff --git a/clang/test/OpenMP/parallel_codegen.cpp 
b/clang/test/OpenMP/parallel_codegen.cpp
index 5c98761be0808ef..d545b4a9d9fa887 100644
--- a/clang/test/OpenMP/parallel_codegen.cpp
+++ b/clang/test/OpenMP/parallel_codegen.cpp
@@ -812,7 +812,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par
-// CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]]
 // CHECK3-NEXT:  omp.par.entry:
 // CHECK3-NEXT:[[GEP__RELOADED:%.*]] = getelementptr { ptr, ptr }, ptr 
[[TMP0]], i32 0, i32 0
 // CHECK3-NEXT:[[LOADGEP__RELOADED:%.*]] = load ptr, ptr 
[[GEP__RELOADED]], align 8
@@ -

[clang] [OpenMP][OMPIRBuilder] Add support to omp target parallel (PR #67000)

2023-10-23 Thread Dominik Adamski via cfe-commits

DominikAdamski wrote:

@shraiysh `ParallelSimpleGPU` tests only the LLVM IR for the target device. 
`kmpc_parallel_51` is the function which needs to be executed on the GPU. The 
aim of this patch is to add GPU code generation for `omp target parallel 
pragma`.

https://github.com/llvm/llvm-project/pull/67000
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP][OMPIRBuilder] Add support to omp target parallel (PR #67000)

2023-10-26 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/67000

>From f1d803c4f581a9212368ac775036b97b3144a67c Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Thu, 26 Oct 2023 05:30:38 -0500
Subject: [PATCH 1/2] [NFC][clang][OpenMP] Update OpenMP clang tests

Replace hardcoded constants by regular expressions.
---
 clang/test/OpenMP/cancel_codegen.cpp   | 20 ++--
 clang/test/OpenMP/parallel_codegen.cpp |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/clang/test/OpenMP/cancel_codegen.cpp 
b/clang/test/OpenMP/cancel_codegen.cpp
index 53580e0c2b0293f..03024cf331b2717 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -1026,25 +1026,25 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META8:![0-9]+]])
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META10:![0-9]+]])
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META12:![0-9]+]])
-// CHECK3-NEXT:store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:[[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias !14
+// CHECK3-NEXT:store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, 
!noalias ![[NOALIAS0:[0-9]+]]
+// CHECK3-NEXT:store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:[[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
 // CHECK3-NEXT:[[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB12:[0-9]+]])
 // CHECK3-NEXT:[[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM_I]], i32 4)
 // CHECK3-NEXT:[[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
 // CHECK3-NEXT:br i1 [[TMP10]], label [[DOTCANCEL_EXIT_I:%.*]], label 
[[DOTCANCEL_CONTINUE_I:%.*]]
 // CHECK3:   .cancel.exit.i:
-// CHECK3-NEXT:store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
!14
+// CHECK3-NEXT:store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
![[NOALIAS1:[0-9]+]]
 // CHECK3-NEXT:br label [[DOTOMP_OUTLINED__EXIT:%.*]]
 // CHECK3:   .cancel.continue.i:
-// CHECK3-NEXT:store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
!14
+// CHECK3-NEXT:store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
![[NOALIAS1]]
 // CHECK3-NEXT:br label [[DOTOMP_OUTLINED__EXIT]]
 // CHECK3:   .omp_outlined..exit:
-// CHECK3-NEXT:[[CLEANUP_DEST_I:%.*]] = load i32, ptr 
[[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
+// CHECK3-NEXT:[[CLEANUP_DEST_I:%.*]] = load i32, ptr 
[[CLEANUP_DEST_SLOT_I]], align 4, !noalias ![[NOALIAS1]]
 // CHECK3-NEXT:ret i32 0
 //
 //
diff --git a/clang/test/OpenMP/parallel_codegen.cpp 
b/clang/test/OpenMP/parallel_codegen.cpp
index 5c98761be0808ef..d545b4a9d9fa887 100644
--- a/clang/test/OpenMP/parallel_codegen.cpp
+++ b/clang/test/OpenMP/parallel_codegen.cpp
@@ -812,7 +812,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par
-// CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR1]] {
+// CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]]
 // CHECK3-NEXT:  omp.par.entry:
 // CHECK3-NEXT:[[GEP__RELOADED:%.*]] = getelementptr { ptr, ptr }, ptr 
[[TMP0]], i32 0, i32 0
 // CHECK3-NEXT:[[LOADGEP__RELOADED:%.*]] = load ptr, ptr 
[[GEP__RELOADED]], align 8
@@ -956,7 +956,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par
-// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG57:![0-9]+]] {
+// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], 
ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG57:![0-9]+]] {
 // CHECK4-NEXT:  omp.par.entry:
 // CHECK4-NEXT:[[GEP__RELOADED:%.*]] = getelementptr { ptr, ptr }, ptr 
[[

[clang] [OpenMP][OMPIRBuilder] Add support to omp target parallel (PR #67000)

2023-10-26 Thread Dominik Adamski via cfe-commits


@@ -1026,25 +1026,25 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META8:![0-9]+]])
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META10:![0-9]+]])
 // CHECK3-NEXT:call void @llvm.experimental.noalias.scope.decl(metadata 
[[META12:![0-9]+]])
-// CHECK3-NEXT:store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias !14
-// CHECK3-NEXT:[[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias !14
+// CHECK3-NEXT:store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, 
!noalias ![[NOALIAS0:[0-9]+]]
+// CHECK3-NEXT:store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
+// CHECK3-NEXT:[[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, 
!noalias ![[NOALIAS0]]
 // CHECK3-NEXT:[[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB12:[0-9]+]])
 // CHECK3-NEXT:[[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM_I]], i32 4)
 // CHECK3-NEXT:[[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
 // CHECK3-NEXT:br i1 [[TMP10]], label [[DOTCANCEL_EXIT_I:%.*]], label 
[[DOTCANCEL_CONTINUE_I:%.*]]
 // CHECK3:   .cancel.exit.i:
-// CHECK3-NEXT:store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
!14
+// CHECK3-NEXT:store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
![[NOALIAS1:[0-9]+]]
 // CHECK3-NEXT:br label [[DOTOMP_OUTLINED__EXIT:%.*]]
 // CHECK3:   .cancel.continue.i:
-// CHECK3-NEXT:store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
!14
+// CHECK3-NEXT:store i32 0, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias 
![[NOALIAS1]]
 // CHECK3-NEXT:br label [[DOTOMP_OUTLINED__EXIT]]
 // CHECK3:   .omp_outlined..exit:
-// CHECK3-NEXT:[[CLEANUP_DEST_I:%.*]] = load i32, ptr 
[[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14
+// CHECK3-NEXT:[[CLEANUP_DEST_I:%.*]] = load i32, ptr 
[[CLEANUP_DEST_SLOT_I]], align 4, !noalias ![[NOALIAS1]]

DominikAdamski wrote:

Done. These changes are moved to separate commit.

https://github.com/llvm/llvm-project/pull/67000
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP][OMPIRBuilder] Add support to omp target parallel (PR #67000)

2023-10-26 Thread Dominik Adamski via cfe-commits


@@ -1126,6 +1133,185 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(Value 
*CancelFlag,
   Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
 }
 
+// Callback used to create OpenMP runtime calls to support
+// omp parallel clause for the device.
+// We need to use this callback to replace call to the OutlinedFn in OuterFn
+// by the call to the OpenMP DeviceRTL runtime function (kmpc_parallel_51)
+static void
+targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn,
+   Function *OuterFn, Value *Ident, Value *IfCondition,
+   Value *NumThreads, Instruction *PrivTID,
+   AllocaInst *PrivTIDAddr, Value *ThreadID,
+   const SmallVector &ToBeDeleted) {
+  // Add some known attributes.
+  Module &M = OMPIRBuilder->M;
+  IRBuilder<> &Builder = OMPIRBuilder->Builder;
+  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(0, Attribute::NoUndef);
+  OutlinedFn.addParamAttr(1, Attribute::NoUndef);
+  OutlinedFn.addFnAttr(Attribute::NoUnwind);
+
+  assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+  unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
+
+  CallInst *CI = cast(OutlinedFn.user_back());
+  assert(CI && "Expected call instruction to outlined function");
+  CI->getParent()->setName("omp_parallel");
+  // Replace direct call to the outlined function by the call to
+  // __kmpc_parallel_51
+  Builder.SetInsertPoint(CI);
+
+  // Build call __kmpc_parallel_51
+  auto PtrTy = Type::getInt8PtrTy(M.getContext());
+  Value *Void = ConstantPointerNull::get(PtrTy);
+  // Add alloca for kernel args. Put this instruction at the beginning
+  // of the function.
+  OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
+  Builder.SetInsertPoint(&OuterFn->front(),
+ OuterFn->front().getFirstInsertionPt());
+  AllocaInst *ArgsAlloca =
+  Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars));
+  Value *Args =
+  Builder.CreatePointerCast(ArgsAlloca, 
Type::getInt8PtrTy(M.getContext()));
+  Builder.restoreIP(CurrentIP);
+  // Store captured vars which are used by kmpc_parallel_51
+  if (NumCapturedVars) {
+for (unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
+  Value *V = *(CI->arg_begin() + 2 + Idx);
+  Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
+  ArrayType::get(PtrTy, NumCapturedVars), Args, 0, Idx);
+  Builder.CreateStore(V, StoreAddress);
+}
+  }
+  Value *Cond = IfCondition ? Builder.CreateSExtOrTrunc(
+  IfCondition, 
Type::getInt32Ty(M.getContext()))
+: Builder.getInt32(1);
+  Value *Parallel51CallArgs[] = {
+  /* identifier*/ Ident,
+  /* global thread num*/ ThreadID,
+  /* if expression */ Cond, NumThreads ? NumThreads : Builder.getInt32(-1),
+  /* Proc bind */ Builder.getInt32(-1),
+  /* outlined function */
+  Builder.CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr), Void,
+  Args, Builder.getInt64(NumCapturedVars)};
+
+  SmallVector RealArgs;
+  RealArgs.append(std::begin(Parallel51CallArgs), 
std::end(Parallel51CallArgs));
+  FunctionCallee RTLFn =
+  OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
+
+  Builder.CreateCall(RTLFn, RealArgs);
+
+  LLVM_DEBUG(dbgs() << "With kmpc_parallel_51 placed: "
+<< *Builder.GetInsertBlock()->getParent() << "\n");
+
+  // Initialize the local TID stack location with the argument value.
+  Builder.SetInsertPoint(PrivTID);
+  Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
+  Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
+  PrivTIDAddr);
+
+  // Remove redundant call to the outlined function.
+  CI->eraseFromParent();
+
+  for (Instruction *I : ToBeDeleted) {
+I->eraseFromParent();
+  }
+}
+
+// Callback used to create OpenMP runtime calls to support
+// omp parallel clause for the host.
+// We need to use this callback to replace call to the OutlinedFn in OuterFn
+// by the call to the OpenMP host runtime function ( __kmpc_fork_call[_if])
+static void
+hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn,
+ Function *OuterFn, Value *Ident, Value *IfCondition,
+ Instruction *PrivTID, AllocaInst *PrivTIDAddr,
+ const SmallVector &ToBeDeleted) {
+  Module &M = OMPIRBuilder->M;
+  IRBuilder<> &Builder = OMPIRBuilder->Builder;
+  FunctionCallee RTLFn;
+  if (IfCondition) {
+RTLFn =
+
OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
+  } else {
+RTLFn =
+OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
+  }
+  if (auto *F = dyn_cast(RTLFn.getCallee

[clang] [OpenMP][OMPIRBuilder] Add support to omp target parallel (PR #67000)

2023-10-26 Thread Dominik Adamski via cfe-commits


@@ -1126,6 +1133,185 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(Value 
*CancelFlag,
   Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
 }
 
+// Callback used to create OpenMP runtime calls to support
+// omp parallel clause for the device.
+// We need to use this callback to replace call to the OutlinedFn in OuterFn
+// by the call to the OpenMP DeviceRTL runtime function (kmpc_parallel_51)
+static void
+targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn,
+   Function *OuterFn, Value *Ident, Value *IfCondition,
+   Value *NumThreads, Instruction *PrivTID,
+   AllocaInst *PrivTIDAddr, Value *ThreadID,
+   const SmallVector &ToBeDeleted) {
+  // Add some known attributes.
+  Module &M = OMPIRBuilder->M;
+  IRBuilder<> &Builder = OMPIRBuilder->Builder;
+  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(0, Attribute::NoUndef);
+  OutlinedFn.addParamAttr(1, Attribute::NoUndef);
+  OutlinedFn.addFnAttr(Attribute::NoUnwind);
+
+  assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+  unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
+
+  CallInst *CI = cast(OutlinedFn.user_back());
+  assert(CI && "Expected call instruction to outlined function");
+  CI->getParent()->setName("omp_parallel");
+  // Replace direct call to the outlined function by the call to
+  // __kmpc_parallel_51
+  Builder.SetInsertPoint(CI);
+
+  // Build call __kmpc_parallel_51
+  auto PtrTy = Type::getInt8PtrTy(M.getContext());
+  Value *Void = ConstantPointerNull::get(PtrTy);
+  // Add alloca for kernel args. Put this instruction at the beginning
+  // of the function.
+  OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
+  Builder.SetInsertPoint(&OuterFn->front(),
+ OuterFn->front().getFirstInsertionPt());
+  AllocaInst *ArgsAlloca =
+  Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars));
+  Value *Args =
+  Builder.CreatePointerCast(ArgsAlloca, 
Type::getInt8PtrTy(M.getContext()));

DominikAdamski wrote:

Done.

https://github.com/llvm/llvm-project/pull/67000
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP][OMPIRBuilder] Add support to omp target parallel (PR #67000)

2023-10-26 Thread Dominik Adamski via cfe-commits


@@ -1126,6 +1133,185 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(Value 
*CancelFlag,
   Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
 }
 
+// Callback used to create OpenMP runtime calls to support
+// omp parallel clause for the device.
+// We need to use this callback to replace call to the OutlinedFn in OuterFn
+// by the call to the OpenMP DeviceRTL runtime function (kmpc_parallel_51)
+static void
+targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn,
+   Function *OuterFn, Value *Ident, Value *IfCondition,
+   Value *NumThreads, Instruction *PrivTID,
+   AllocaInst *PrivTIDAddr, Value *ThreadID,
+   const SmallVector &ToBeDeleted) {
+  // Add some known attributes.
+  Module &M = OMPIRBuilder->M;
+  IRBuilder<> &Builder = OMPIRBuilder->Builder;
+  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(0, Attribute::NoUndef);
+  OutlinedFn.addParamAttr(1, Attribute::NoUndef);
+  OutlinedFn.addFnAttr(Attribute::NoUnwind);
+
+  assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+  unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
+
+  CallInst *CI = cast(OutlinedFn.user_back());
+  assert(CI && "Expected call instruction to outlined function");
+  CI->getParent()->setName("omp_parallel");
+  // Replace direct call to the outlined function by the call to
+  // __kmpc_parallel_51
+  Builder.SetInsertPoint(CI);
+
+  // Build call __kmpc_parallel_51
+  auto PtrTy = Type::getInt8PtrTy(M.getContext());
+  Value *Void = ConstantPointerNull::get(PtrTy);
+  // Add alloca for kernel args. Put this instruction at the beginning
+  // of the function.
+  OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
+  Builder.SetInsertPoint(&OuterFn->front(),
+ OuterFn->front().getFirstInsertionPt());
+  AllocaInst *ArgsAlloca =
+  Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars));
+  Value *Args =
+  Builder.CreatePointerCast(ArgsAlloca, 
Type::getInt8PtrTy(M.getContext()));
+  Builder.restoreIP(CurrentIP);
+  // Store captured vars which are used by kmpc_parallel_51
+  if (NumCapturedVars) {

DominikAdamski wrote:

Done

https://github.com/llvm/llvm-project/pull/67000
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP][OMPIRBuilder] Add support to omp target parallel (PR #67000)

2023-10-26 Thread Dominik Adamski via cfe-commits


@@ -1126,6 +1133,185 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(Value 
*CancelFlag,
   Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
 }
 
+// Callback used to create OpenMP runtime calls to support
+// omp parallel clause for the device.
+// We need to use this callback to replace call to the OutlinedFn in OuterFn
+// by the call to the OpenMP DeviceRTL runtime function (kmpc_parallel_51)
+static void
+targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn,
+   Function *OuterFn, Value *Ident, Value *IfCondition,
+   Value *NumThreads, Instruction *PrivTID,
+   AllocaInst *PrivTIDAddr, Value *ThreadID,
+   const SmallVector &ToBeDeleted) {
+  // Add some known attributes.
+  Module &M = OMPIRBuilder->M;
+  IRBuilder<> &Builder = OMPIRBuilder->Builder;
+  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(0, Attribute::NoUndef);
+  OutlinedFn.addParamAttr(1, Attribute::NoUndef);
+  OutlinedFn.addFnAttr(Attribute::NoUnwind);
+
+  assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+  unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
+
+  CallInst *CI = cast(OutlinedFn.user_back());
+  assert(CI && "Expected call instruction to outlined function");
+  CI->getParent()->setName("omp_parallel");
+  // Replace direct call to the outlined function by the call to
+  // __kmpc_parallel_51
+  Builder.SetInsertPoint(CI);
+
+  // Build call __kmpc_parallel_51
+  auto PtrTy = Type::getInt8PtrTy(M.getContext());
+  Value *Void = ConstantPointerNull::get(PtrTy);
+  // Add alloca for kernel args. Put this instruction at the beginning
+  // of the function.
+  OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
+  Builder.SetInsertPoint(&OuterFn->front(),
+ OuterFn->front().getFirstInsertionPt());
+  AllocaInst *ArgsAlloca =
+  Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars));
+  Value *Args =
+  Builder.CreatePointerCast(ArgsAlloca, 
Type::getInt8PtrTy(M.getContext()));
+  Builder.restoreIP(CurrentIP);
+  // Store captured vars which are used by kmpc_parallel_51
+  if (NumCapturedVars) {
+for (unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
+  Value *V = *(CI->arg_begin() + 2 + Idx);
+  Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
+  ArrayType::get(PtrTy, NumCapturedVars), Args, 0, Idx);
+  Builder.CreateStore(V, StoreAddress);
+}
+  }
+  Value *Cond = IfCondition ? Builder.CreateSExtOrTrunc(
+  IfCondition, 
Type::getInt32Ty(M.getContext()))
+: Builder.getInt32(1);
+  Value *Parallel51CallArgs[] = {
+  /* identifier*/ Ident,
+  /* global thread num*/ ThreadID,
+  /* if expression */ Cond, NumThreads ? NumThreads : Builder.getInt32(-1),

DominikAdamski wrote:

Done -> added comments.

https://github.com/llvm/llvm-project/pull/67000
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP][OMPIRBuilder] Add support to omp target parallel (PR #67000)

2023-10-26 Thread Dominik Adamski via cfe-commits


@@ -1126,6 +1133,185 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(Value 
*CancelFlag,
   Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
 }
 
+// Callback used to create OpenMP runtime calls to support
+// omp parallel clause for the device.
+// We need to use this callback to replace call to the OutlinedFn in OuterFn
+// by the call to the OpenMP DeviceRTL runtime function (kmpc_parallel_51)
+static void
+targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn,
+   Function *OuterFn, Value *Ident, Value *IfCondition,
+   Value *NumThreads, Instruction *PrivTID,
+   AllocaInst *PrivTIDAddr, Value *ThreadID,
+   const SmallVector &ToBeDeleted) {
+  // Add some known attributes.
+  Module &M = OMPIRBuilder->M;
+  IRBuilder<> &Builder = OMPIRBuilder->Builder;
+  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(0, Attribute::NoUndef);
+  OutlinedFn.addParamAttr(1, Attribute::NoUndef);
+  OutlinedFn.addFnAttr(Attribute::NoUnwind);
+
+  assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+  unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
+
+  CallInst *CI = cast(OutlinedFn.user_back());
+  assert(CI && "Expected call instruction to outlined function");
+  CI->getParent()->setName("omp_parallel");
+  // Replace direct call to the outlined function by the call to
+  // __kmpc_parallel_51
+  Builder.SetInsertPoint(CI);
+
+  // Build call __kmpc_parallel_51
+  auto PtrTy = Type::getInt8PtrTy(M.getContext());
+  Value *Void = ConstantPointerNull::get(PtrTy);

DominikAdamski wrote:

Done. New name:  `Value *NullPtrValue`

https://github.com/llvm/llvm-project/pull/67000
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP][OMPIRBuilder] Add support to omp target parallel (PR #67000)

2023-10-26 Thread Dominik Adamski via cfe-commits


@@ -1126,6 +1133,185 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(Value 
*CancelFlag,
   Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
 }
 
+// Callback used to create OpenMP runtime calls to support
+// omp parallel clause for the device.
+// We need to use this callback to replace call to the OutlinedFn in OuterFn
+// by the call to the OpenMP DeviceRTL runtime function (kmpc_parallel_51)
+static void
+targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn,
+   Function *OuterFn, Value *Ident, Value *IfCondition,
+   Value *NumThreads, Instruction *PrivTID,
+   AllocaInst *PrivTIDAddr, Value *ThreadID,
+   const SmallVector &ToBeDeleted) {
+  // Add some known attributes.
+  Module &M = OMPIRBuilder->M;
+  IRBuilder<> &Builder = OMPIRBuilder->Builder;
+  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(0, Attribute::NoUndef);
+  OutlinedFn.addParamAttr(1, Attribute::NoUndef);
+  OutlinedFn.addFnAttr(Attribute::NoUnwind);
+
+  assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+  unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
+
+  CallInst *CI = cast(OutlinedFn.user_back());
+  assert(CI && "Expected call instruction to outlined function");
+  CI->getParent()->setName("omp_parallel");
+  // Replace direct call to the outlined function by the call to
+  // __kmpc_parallel_51
+  Builder.SetInsertPoint(CI);
+
+  // Build call __kmpc_parallel_51
+  auto PtrTy = Type::getInt8PtrTy(M.getContext());
+  Value *Void = ConstantPointerNull::get(PtrTy);
+  // Add alloca for kernel args. Put this instruction at the beginning
+  // of the function.
+  OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
+  Builder.SetInsertPoint(&OuterFn->front(),
+ OuterFn->front().getFirstInsertionPt());
+  AllocaInst *ArgsAlloca =
+  Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars));
+  Value *Args =
+  Builder.CreatePointerCast(ArgsAlloca, 
Type::getInt8PtrTy(M.getContext()));
+  Builder.restoreIP(CurrentIP);
+  // Store captured vars which are used by kmpc_parallel_51
+  if (NumCapturedVars) {
+for (unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
+  Value *V = *(CI->arg_begin() + 2 + Idx);
+  Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
+  ArrayType::get(PtrTy, NumCapturedVars), Args, 0, Idx);
+  Builder.CreateStore(V, StoreAddress);
+}
+  }
+  Value *Cond = IfCondition ? Builder.CreateSExtOrTrunc(
+  IfCondition, 
Type::getInt32Ty(M.getContext()))
+: Builder.getInt32(1);
+  Value *Parallel51CallArgs[] = {
+  /* identifier*/ Ident,
+  /* global thread num*/ ThreadID,
+  /* if expression */ Cond, NumThreads ? NumThreads : Builder.getInt32(-1),
+  /* Proc bind */ Builder.getInt32(-1),
+  /* outlined function */
+  Builder.CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr), Void,
+  Args, Builder.getInt64(NumCapturedVars)};
+
+  SmallVector RealArgs;
+  RealArgs.append(std::begin(Parallel51CallArgs), 
std::end(Parallel51CallArgs));

DominikAdamski wrote:

Done. Removed vector.

https://github.com/llvm/llvm-project/pull/67000
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP][OMPIRBuilder] Add support to omp target parallel (PR #67000)

2023-10-26 Thread Dominik Adamski via cfe-commits


@@ -1126,6 +1133,185 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(Value 
*CancelFlag,
   Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
 }
 
+// Callback used to create OpenMP runtime calls to support
+// omp parallel clause for the device.
+// We need to use this callback to replace call to the OutlinedFn in OuterFn
+// by the call to the OpenMP DeviceRTL runtime function (kmpc_parallel_51)
+static void
+targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn,
+   Function *OuterFn, Value *Ident, Value *IfCondition,
+   Value *NumThreads, Instruction *PrivTID,
+   AllocaInst *PrivTIDAddr, Value *ThreadID,
+   const SmallVector &ToBeDeleted) {
+  // Add some known attributes.
+  Module &M = OMPIRBuilder->M;
+  IRBuilder<> &Builder = OMPIRBuilder->Builder;
+  OutlinedFn.addParamAttr(0, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(1, Attribute::NoAlias);
+  OutlinedFn.addParamAttr(0, Attribute::NoUndef);
+  OutlinedFn.addParamAttr(1, Attribute::NoUndef);
+  OutlinedFn.addFnAttr(Attribute::NoUnwind);
+
+  assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+  unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
+
+  CallInst *CI = cast(OutlinedFn.user_back());
+  assert(CI && "Expected call instruction to outlined function");
+  CI->getParent()->setName("omp_parallel");
+  // Replace direct call to the outlined function by the call to
+  // __kmpc_parallel_51
+  Builder.SetInsertPoint(CI);
+
+  // Build call __kmpc_parallel_51
+  auto PtrTy = Type::getInt8PtrTy(M.getContext());
+  Value *Void = ConstantPointerNull::get(PtrTy);
+  // Add alloca for kernel args. Put this instruction at the beginning
+  // of the function.
+  OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
+  Builder.SetInsertPoint(&OuterFn->front(),
+ OuterFn->front().getFirstInsertionPt());
+  AllocaInst *ArgsAlloca =
+  Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars));
+  Value *Args =
+  Builder.CreatePointerCast(ArgsAlloca, 
Type::getInt8PtrTy(M.getContext()));
+  Builder.restoreIP(CurrentIP);
+  // Store captured vars which are used by kmpc_parallel_51
+  if (NumCapturedVars) {
+for (unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
+  Value *V = *(CI->arg_begin() + 2 + Idx);
+  Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
+  ArrayType::get(PtrTy, NumCapturedVars), Args, 0, Idx);
+  Builder.CreateStore(V, StoreAddress);
+}
+  }
+  Value *Cond = IfCondition ? Builder.CreateSExtOrTrunc(
+  IfCondition, 
Type::getInt32Ty(M.getContext()))
+: Builder.getInt32(1);
+  Value *Parallel51CallArgs[] = {
+  /* identifier*/ Ident,
+  /* global thread num*/ ThreadID,
+  /* if expression */ Cond, NumThreads ? NumThreads : Builder.getInt32(-1),
+  /* Proc bind */ Builder.getInt32(-1),
+  /* outlined function */
+  Builder.CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr), Void,
+  Args, Builder.getInt64(NumCapturedVars)};
+
+  SmallVector RealArgs;
+  RealArgs.append(std::begin(Parallel51CallArgs), 
std::end(Parallel51CallArgs));
+  FunctionCallee RTLFn =
+  OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
+
+  Builder.CreateCall(RTLFn, RealArgs);
+
+  LLVM_DEBUG(dbgs() << "With kmpc_parallel_51 placed: "
+<< *Builder.GetInsertBlock()->getParent() << "\n");
+
+  // Initialize the local TID stack location with the argument value.
+  Builder.SetInsertPoint(PrivTID);
+  Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
+  Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),

DominikAdamski wrote:

Done. I used OMPIRBuilder types in my PR.

https://github.com/llvm/llvm-project/pull/67000
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 6842d35 - [OpenMP][OMPIRBuilder] Add support for order(concurrent) to OMPIRBuilder for SIMD directive

2022-10-04 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2022-10-04T08:30:00-05:00
New Revision: 6842d35012668d5dc3846fcbde136326e6e09bb3

URL: 
https://github.com/llvm/llvm-project/commit/6842d35012668d5dc3846fcbde136326e6e09bb3
DIFF: 
https://github.com/llvm/llvm-project/commit/6842d35012668d5dc3846fcbde136326e6e09bb3.diff

LOG: [OpenMP][OMPIRBuilder] Add support for order(concurrent) to OMPIRBuilder 
for SIMD directive

If 'order(concurrent)' clause is specified, then the iterations of SIMD loop
can be executed concurrently.

This patch adds support for LLVM IR codegen via OMPIRBuilder for SIMD loop
with 'order(concurrent)' clause. The functionality added to OMPIRBuilder is
similar to the functionality implemented in 'CodeGenFunction::EmitOMPSimdInit'.

Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D134046

Signed-off-by: Dominik Adamski 

Added: 
clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp

Modified: 
clang/lib/CodeGen/CGStmtOpenMP.cpp
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 1ffee9b94e734..d27e2c32c539a 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2600,8 +2600,9 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const 
OMPLoopDirective &S,
 static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
   // Check for unsupported clauses
   for (OMPClause *C : S.clauses()) {
-// Currently only simdlen and safelen clauses are supported
-if (!(isa(C) || isa(C)))
+// Currently only order, simdlen and safelen clauses are supported
+if (!(isa(C) || isa(C) ||
+  isa(C)))
   return false;
   }
 
@@ -2660,9 +2661,15 @@ void CodeGenFunction::EmitOMPSimdDirective(const 
OMPSimdDirective &S) {
   auto *Val = cast(Len.getScalarVal());
   Safelen = Val;
 }
+llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
+if (const auto *C = S.getSingleClause()) {
+  if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) {
+Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
+  }
+}
 // Add simd metadata to the collapsed loop. Do not generate
 // another loop for if clause. Support for if clause is done earlier.
-OMPBuilder.applySimd(CLI, /*IfCond*/ nullptr, Simdlen, Safelen);
+OMPBuilder.applySimd(CLI, /*IfCond*/ nullptr, Order, Simdlen, Safelen);
 return;
   }
 };

diff  --git a/clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp 
b/clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp
new file mode 100644
index 0..35a8d9b60a2fd
--- /dev/null
+++ b/clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp
@@ -0,0 +1,139 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals
+// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-enable-irbuilder -verify 
-fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm 
%s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+  int a, b;
+};
+
+struct P {
+  int a, b;
+};
+
+// CHECK-LABEL: @_Z6simplePfS_Pi(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[A_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:[[B_ADDR:%.*]] = alloca float*, align 8
+// CHECK-NEXT:[[C_ADDR:%.*]] = alloca i32*, align 8
+// CHECK-NEXT:[[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4
+// CHECK-NEXT:[[P:%.*]] = alloca %struct.S*, align 8
+// CHECK-NEXT:[[PP:%.*]] = alloca [[STRUCT_P:%.*]], align 4
+// CHECK-NEXT:[[I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:[[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8
+// CHECK-NEXT:[[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4
+// CHECK-NEXT:[[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:[[J:%.*]] = alloca i32, align 4
+// CHECK-NEXT:[[AGG_CAPTURED8:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8
+// CHECK-NEXT:[[AGG_CAPTURED9:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4
+// CHECK-NEXT:[[DOTCOUNT_ADDR10:%.*]] = alloca i32, align 4
+// CHECK-NEXT:store float* [[A:%.*]], float** [[A_ADDR]], align 8
+// CHECK-NEXT:store float* [[B:%.*]], float** [[B_ADDR]], align 8
+// CHECK-NEXT:store i32* [[C:%.*]], i32** [[C_ADDR]], align 8
+// CHECK-NEXT:store i32 3, i32* [[I]], align 4
+// CHECK-NEXT:[[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], 
%struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
+// CHECK-NEXT:store i32* [[I]], i32** [[TMP0]], align 8
+// CHECK-NEXT:[[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], 
%struct.anon.0* [[AGG_CAPTURED1

[clang] ccd314d - [OpenMP][OMPIRBuilder] Add generation of SIMD align assumptions to OMPIRBuilder

2022-10-18 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2022-10-18T02:04:18-05:00
New Revision: ccd314d3209a192fc17ad621cf7fe3f09f7c7b9f

URL: 
https://github.com/llvm/llvm-project/commit/ccd314d3209a192fc17ad621cf7fe3f09f7c7b9f
DIFF: 
https://github.com/llvm/llvm-project/commit/ccd314d3209a192fc17ad621cf7fe3f09f7c7b9f.diff

LOG: [OpenMP][OMPIRBuilder] Add generation of SIMD align assumptions to 
OMPIRBuilder

Currently generation of align assumptions for OpenMP simd construct is done
outside OMPIRBuilder for C code and it is not supported for Fortran.

According to OpenMP 5.0 standard (2.9.3) only pointers and arrays can be
aligned for C code.

If given aligned variable is pointer, then Clang generates the following set
of the LLVM IR isntructions to support simd align clause:

; memory allocation for pointer address:
%A.addr = alloca ptr, align 8
; some LLVM IR code
; Alignment instructions (alignment is equal to 32):
%0 = load ptr, ptr %A.addr, align 8
call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 32) ]

If given aligned variable is array, then Clang generates the following set
of the LLVM IR isntructions to support simd align clause:

; memory allocation for array:
%B = alloca [10 x i32], align 16
; some LLVM IR code
; Alignment instructions (alignment is equal to 32):
%arraydecay = getelementptr inbounds [10 x i32], ptr %B, i64 0, i64 0
call void @llvm.assume(i1 true) [ "align"(ptr %arraydecay, i64 32) ]

OMPIRBuilder was modified to generate aligned assumptions. It generates only
llvm.assume calls. Frontend is responsible for generation of aligned pointer
and getting the default alignment value if user does not specify it in aligned
clause.

Unit and regression tests were added to check if aligned clause was handled 
correctly.

Differential Revision: https://reviews.llvm.org/D133578

Reviewed By: jdoerfert

Added: 
clang/test/OpenMP/irbuilder_simd_aligned.cpp

Modified: 
clang/lib/CodeGen/CGStmtOpenMP.cpp
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index d27e2c32c539a..40d84d754f9d3 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2602,7 +2602,7 @@ static bool isSupportedByOpenMPIRBuilder(const 
OMPSimdDirective &S) {
   for (OMPClause *C : S.clauses()) {
 // Currently only order, simdlen and safelen clauses are supported
 if (!(isa(C) || isa(C) ||
-  isa(C)))
+  isa(C) || isa(C)))
   return false;
   }
 
@@ -2628,6 +2628,36 @@ static bool isSupportedByOpenMPIRBuilder(const 
OMPSimdDirective &S) {
   }
   return true;
 }
+static llvm::MapVector
+GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
+  llvm::MapVector AlignedVars;
+  for (const auto *Clause : S.getClausesOfKind()) {
+llvm::APInt ClauseAlignment(64, 0);
+if (const Expr *AlignmentExpr = Clause->getAlignment()) {
+  auto *AlignmentCI =
+  cast(CGF.EmitScalarExpr(AlignmentExpr));
+  ClauseAlignment = AlignmentCI->getValue();
+}
+for (const Expr *E : Clause->varlists()) {
+  llvm::APInt Alignment(ClauseAlignment);
+  if (Alignment == 0) {
+// OpenMP [2.8.1, Description]
+// If no optional parameter is specified, implementation-defined 
default
+// alignments for SIMD instructions on the target platforms are 
assumed.
+Alignment =
+CGF.getContext()
+
.toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
+E->getType()->getPointeeType()))
+.getQuantity();
+  }
+  assert((Alignment == 0 || Alignment.isPowerOf2()) &&
+ "alignment is not power of 2");
+  llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
+  AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue());
+}
+  }
+  return AlignedVars;
+}
 
 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
   bool UseOMPIRBuilder =
@@ -2637,6 +2667,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const 
OMPSimdDirective &S) {
   PrePostActionTy &) {
   // Use the OpenMPIRBuilder if enabled.
   if (UseOMPIRBuilder) {
+llvm::MapVector AlignedVars =
+GetAlignedMapping(S, CGF);
 // Emit the associated statement and get its loop representation.
 const Stmt *Inner = S.getRawStmt();
 llvm::CanonicalLoopInfo *CLI =
@@ -2669,7 +2701,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const 
OMPSimdDirective &S) {
 }
 // Add simd metadata to the collapsed loop. Do not generate
 // another loop for if clause. Support for if clause is done earlier

[clang] ca44603 - [OpenMP][OMPIRBuilder]Move SIMD alignment calculation to LLVM Frontend

2023-01-26 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2023-01-26T15:10:19-06:00
New Revision: ca446037af019d1aa01b1352a30a18df33038359

URL: 
https://github.com/llvm/llvm-project/commit/ca446037af019d1aa01b1352a30a18df33038359
DIFF: 
https://github.com/llvm/llvm-project/commit/ca446037af019d1aa01b1352a30a18df33038359.diff

LOG: [OpenMP][OMPIRBuilder]Move SIMD alignment calculation to LLVM Frontend

Currently default simd alignment is defined by Clang specific TargetInfo class.
This class cannot be reused for LLVM Flang. That's why default simd alignment
calculation has been moved to OMPIRBuilder which is common for Flang and Clang.

Previous attempt: https://reviews.llvm.org/D138496 was wrong because
the default alignment depended on the number of built LLVM targets.

If we wanted to calculate the default alignment for PPC and we hadn't specified
PPC LLVM target to build, then we would get 0 as the alignment because
OMPIRBuilder couldn't create PPCTargetMachine object and it returned 0 as
the default value.

If PPC LLVM target had been built earlier, then OMPIRBuilder could have created
PPCTargetMachine object and it would have returned 128.

Differential Revision: https://reviews.llvm.org/D141910

Reviewed By: jdoerfert

Added: 


Modified: 
clang/include/clang/Basic/TargetInfo.h
clang/lib/AST/ASTContext.cpp
clang/lib/Basic/TargetInfo.cpp
clang/lib/Basic/Targets/PPC.h
clang/lib/Basic/Targets/WebAssembly.h
clang/lib/Basic/Targets/X86.cpp
lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index bb2a453f46573..b30368aa822f5 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -225,7 +225,6 @@ class TargetInfo : public virtual TransferrableTargetInfo,
   bool HasStrictFP;
 
   unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth;
-  unsigned short SimdDefaultAlign;
   std::string DataLayoutString;
   const char *UserLabelPrefix;
   const char *MCountName;
@@ -794,10 +793,6 @@ class TargetInfo : public virtual TransferrableTargetInfo,
 
   /// Return the maximum vector alignment supported for the given target.
   unsigned getMaxVectorAlign() const { return MaxVectorAlign; }
-  /// Return default simd alignment for the given target. Generally, this
-  /// value is type-specific, but this alignment can be used for most of the
-  /// types for the given target.
-  unsigned getSimdDefaultAlign() const { return SimdDefaultAlign; }
 
   unsigned getMaxOpenCLWorkGroupSize() const { return MaxOpenCLWorkGroupSize; }
 

diff  --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index bdbf1891190e4..84434ced242d3 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -78,6 +78,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
@@ -2542,7 +2543,8 @@ unsigned ASTContext::getTypeUnadjustedAlign(const Type 
*T) const {
 }
 
 unsigned ASTContext::getOpenMPDefaultSimdAlign(QualType T) const {
-  unsigned SimdAlign = getTargetInfo().getSimdDefaultAlign();
+  unsigned SimdAlign = llvm::OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
+  getTargetInfo().getTriple(), Target->getTargetOpts().FeatureMap);
   return SimdAlign;
 }
 

diff  --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 8ee43261fc1d3..fa5e568d599d0 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -119,7 +119,6 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 0;
   MaxVectorAlign = 0;
   MaxTLSAlign = 0;
-  SimdDefaultAlign = 0;
   SizeType = UnsignedLong;
   PtrDiffType = SignedLong;
   IntMaxType = SignedLongLong;

diff  --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index cc185fdadfcbc..4c02183feb4c1 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -87,7 +87,6 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public 
TargetInfo {
   PPCTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
   : TargetInfo(Triple) {
 SuitableAlign = 128;
-SimdDefaultAlign = 128;
 LongDoubleWidth = LongDoubleAlign = 128;
 LongDoubleFormat = &llvm::APFloat::PPCDoubleDouble();
 HasStrictFP = true;

diff  --git a/clang/lib/Basic/Targets/WebAssembly.h 
b/clang/lib/Basic/Targets/WebAssembly.h
index 1e73450fdd0c3..1f0bb08665347 100644
--- a/clang/lib/Basic/Targets/WebAssembly.h
+++ b/clang/lib/Basic/Targets/WebAssembly.h
@@ -49,7 +49,6 @@ class LLVM_LIBRARY_VISIBILITY WebA

[clang] ed01de6 - [OpenMP][OMPIRBuilder] Move SIMD alignment calculation to LLVM Frontend

2023-01-13 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2023-01-13T14:07:29-06:00
New Revision: ed01de67433174d3157e9d239d59dd465d52c6a5

URL: 
https://github.com/llvm/llvm-project/commit/ed01de67433174d3157e9d239d59dd465d52c6a5
DIFF: 
https://github.com/llvm/llvm-project/commit/ed01de67433174d3157e9d239d59dd465d52c6a5.diff

LOG: [OpenMP][OMPIRBuilder] Move SIMD alignment calculation to LLVM Frontend

Currently default simd alignment is specified by Clang specific TargetInfo
class. This class cannot be reused for LLVM Flang. If we move the default
alignment field into TargetMachine class then we can create TargetMachine
objects and query them to find SIMD alignment.

Scope of changes:
  1) Added information about maximal allowed SIMD alignment to TargetMachine
 classes.
  2) Removed getSimdDefaultAlign function from Clang TargetInfo class.
  3) Refactored createTargetMachine function.

Reviewed By: jsjodin

Differential Revision: https://reviews.llvm.org/D138496

Added: 


Modified: 
clang/include/clang/Basic/TargetInfo.h
clang/lib/AST/ASTContext.cpp
clang/lib/Basic/TargetInfo.cpp
clang/lib/Basic/Targets/PPC.h
clang/lib/Basic/Targets/WebAssembly.h
clang/lib/Basic/Targets/X86.cpp
lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/include/llvm/Target/TargetMachine.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
llvm/lib/Target/X86/X86TargetMachine.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index a5aea33d84751..def708daac8d2 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -226,7 +226,6 @@ class TargetInfo : public virtual TransferrableTargetInfo,
   bool HasStrictFP;
 
   unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth;
-  unsigned short SimdDefaultAlign;
   std::string DataLayoutString;
   const char *UserLabelPrefix;
   const char *MCountName;
@@ -795,10 +794,6 @@ class TargetInfo : public virtual TransferrableTargetInfo,
 
   /// Return the maximum vector alignment supported for the given target.
   unsigned getMaxVectorAlign() const { return MaxVectorAlign; }
-  /// Return default simd alignment for the given target. Generally, this
-  /// value is type-specific, but this alignment can be used for most of the
-  /// types for the given target.
-  unsigned getSimdDefaultAlign() const { return SimdDefaultAlign; }
 
   unsigned getMaxOpenCLWorkGroupSize() const { return MaxOpenCLWorkGroupSize; }
 

diff  --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 15a43807c3603..6b97407236ca5 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -79,6 +79,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
@@ -93,6 +94,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2463,7 +2465,16 @@ unsigned ASTContext::getTypeUnadjustedAlign(const Type 
*T) const {
 }
 
 unsigned ASTContext::getOpenMPDefaultSimdAlign(QualType T) const {
-  unsigned SimdAlign = getTargetInfo().getSimdDefaultAlign();
+  const std::vector &TargetFeatures =
+  Target->getTargetOpts().Features;
+  std::string TargetFeaturesString = std::accumulate(
+  TargetFeatures.cbegin(), TargetFeatures.cend(), std::string(),
+  [](const std::string &s1, const std::string &s2) {
+return s1.empty() ? s2 : s1 + "," + s2;
+  });
+  unsigned SimdAlign = llvm::OpenMPIRBuilder ::getSimdDefaultAlignment(
+  getTargetInfo().getTriple().str(), Target->getTargetOpts().CPU,
+  TargetFeaturesString);
   return SimdAlign;
 }
 

diff  --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 8ee43261fc1d3..fa5e568d599d0 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -119,7 +119,6 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 0;
   MaxVectorAlign = 0;
   MaxTLSAlign = 0;
-  SimdDefaultAlign = 0;
   SizeType = UnsignedLong;
   PtrDiffType = SignedLong;
   IntMaxType = SignedLongLong;

diff  --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index cc185fdadfcbc..4c02183feb4c1 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -87,7 +87,6 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public 
TargetInfo {
   PPCTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
   : TargetInfo(Triple) {
 SuitableAlign = 128;
-SimdDefaultAlign = 128;
 LongDoubleWidth =

[clang] 6809af1 - Revert "[OpenMP][OMPIRBuilder] Move SIMD alignment calculation to LLVM Frontend"

2023-01-13 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2023-01-13T14:38:17-06:00
New Revision: 6809af1a232bc5ac71358e4b874759ddaae056a1

URL: 
https://github.com/llvm/llvm-project/commit/6809af1a232bc5ac71358e4b874759ddaae056a1
DIFF: 
https://github.com/llvm/llvm-project/commit/6809af1a232bc5ac71358e4b874759ddaae056a1.diff

LOG: Revert "[OpenMP][OMPIRBuilder] Move SIMD alignment calculation to LLVM 
Frontend"

This reverts commit ed01de67433174d3157e9d239d59dd465d52c6a5.

Added: 


Modified: 
clang/include/clang/Basic/TargetInfo.h
clang/lib/AST/ASTContext.cpp
clang/lib/Basic/TargetInfo.cpp
clang/lib/Basic/Targets/PPC.h
clang/lib/Basic/Targets/WebAssembly.h
clang/lib/Basic/Targets/X86.cpp
lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/include/llvm/Target/TargetMachine.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
llvm/lib/Target/X86/X86TargetMachine.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index def708daac8d2..a5aea33d84751 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -226,6 +226,7 @@ class TargetInfo : public virtual TransferrableTargetInfo,
   bool HasStrictFP;
 
   unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth;
+  unsigned short SimdDefaultAlign;
   std::string DataLayoutString;
   const char *UserLabelPrefix;
   const char *MCountName;
@@ -794,6 +795,10 @@ class TargetInfo : public virtual TransferrableTargetInfo,
 
   /// Return the maximum vector alignment supported for the given target.
   unsigned getMaxVectorAlign() const { return MaxVectorAlign; }
+  /// Return default simd alignment for the given target. Generally, this
+  /// value is type-specific, but this alignment can be used for most of the
+  /// types for the given target.
+  unsigned getSimdDefaultAlign() const { return SimdDefaultAlign; }
 
   unsigned getMaxOpenCLWorkGroupSize() const { return MaxOpenCLWorkGroupSize; }
 

diff  --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 6b97407236ca5..15a43807c3603 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -79,7 +79,6 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
@@ -94,7 +93,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -2465,16 +2463,7 @@ unsigned ASTContext::getTypeUnadjustedAlign(const Type 
*T) const {
 }
 
 unsigned ASTContext::getOpenMPDefaultSimdAlign(QualType T) const {
-  const std::vector &TargetFeatures =
-  Target->getTargetOpts().Features;
-  std::string TargetFeaturesString = std::accumulate(
-  TargetFeatures.cbegin(), TargetFeatures.cend(), std::string(),
-  [](const std::string &s1, const std::string &s2) {
-return s1.empty() ? s2 : s1 + "," + s2;
-  });
-  unsigned SimdAlign = llvm::OpenMPIRBuilder ::getSimdDefaultAlignment(
-  getTargetInfo().getTriple().str(), Target->getTargetOpts().CPU,
-  TargetFeaturesString);
+  unsigned SimdAlign = getTargetInfo().getSimdDefaultAlign();
   return SimdAlign;
 }
 

diff  --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index fa5e568d599d0..8ee43261fc1d3 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -119,6 +119,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 0;
   MaxVectorAlign = 0;
   MaxTLSAlign = 0;
+  SimdDefaultAlign = 0;
   SizeType = UnsignedLong;
   PtrDiffType = SignedLong;
   IntMaxType = SignedLongLong;

diff  --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 4c02183feb4c1..cc185fdadfcbc 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -87,6 +87,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public 
TargetInfo {
   PPCTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
   : TargetInfo(Triple) {
 SuitableAlign = 128;
+SimdDefaultAlign = 128;
 LongDoubleWidth = LongDoubleAlign = 128;
 LongDoubleFormat = &llvm::APFloat::PPCDoubleDouble();
 HasStrictFP = true;

diff  --git a/clang/lib/Basic/Targets/WebAssembly.h 
b/clang/lib/Basic/Targets/WebAssembly.h
index 1f0bb08665347..1e73450fdd0c3 100644
--- a/clang/lib/Basic/Targets/WebAssembly.h
+++ b/clang/lib/Basic/Targets/WebAssembly.h
@@ -49,6 +49,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public 
TargetInfo {
 SuitableAlign = 128;
 LargeArrayMinWidth = 128;
 LargeArrayAlign =

[clang] e43247d - [Clang][Flang][AMDGPU] Add support for AMDGPU to Flang driver

2023-03-29 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2023-03-29T02:23:37-05:00
New Revision: e43247dd329cabf7eb4dd4323b3422d3774e57a7

URL: 
https://github.com/llvm/llvm-project/commit/e43247dd329cabf7eb4dd4323b3422d3774e57a7
DIFF: 
https://github.com/llvm/llvm-project/commit/e43247dd329cabf7eb4dd4323b3422d3774e57a7.diff

LOG: [Clang][Flang][AMDGPU] Add support for AMDGPU to Flang driver

Scope of changes:
  1) Extract common code between Clang and Flang for parsing AMDGPU features
  2) Add function which adds implicit target features for AMDGPU as Clang does
  3) Add AMDGPU target as one of valid targets for Flang

Differential Revision: https://reviews.llvm.org/D145579

Reviewed By: yaxunl, awarzynski

Added: 


Modified: 
clang/lib/Basic/Targets/AMDGPU.cpp
clang/lib/Driver/ToolChains/CommonArgs.cpp
clang/lib/Driver/ToolChains/Flang.cpp
flang/lib/Frontend/FrontendActions.cpp
flang/test/Driver/target-cpu-features-invalid.f90
flang/test/Driver/target-cpu-features.f90
llvm/include/llvm/TargetParser/TargetParser.h
llvm/lib/TargetParser/TargetParser.cpp

Removed: 




diff  --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index 9b3a0b0f40edb..5b99755c21e2b 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -179,197 +179,19 @@ ArrayRef 
AMDGPUTargetInfo::getGCCRegNames() const {
 bool AMDGPUTargetInfo::initFeatureMap(
 llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU,
 const std::vector &FeatureVec) const {
-  const bool IsNullCPU = CPU.empty();
-  bool IsWave32Capable = false;
 
   using namespace llvm::AMDGPU;
-
-  // XXX - What does the member GPU mean if device name string passed here?
-  if (isAMDGCN(getTriple())) {
-switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
-case GK_GFX1103:
-case GK_GFX1102:
-case GK_GFX1101:
-case GK_GFX1100:
-  IsWave32Capable = true;
-  Features["ci-insts"] = true;
-  Features["dot5-insts"] = true;
-  Features["dot7-insts"] = true;
-  Features["dot8-insts"] = true;
-  Features["dot9-insts"] = true;
-  Features["dot10-insts"] = true;
-  Features["dl-insts"] = true;
-  Features["16-bit-insts"] = true;
-  Features["dpp"] = true;
-  Features["gfx8-insts"] = true;
-  Features["gfx9-insts"] = true;
-  Features["gfx10-insts"] = true;
-  Features["gfx10-3-insts"] = true;
-  Features["gfx11-insts"] = true;
-  Features["atomic-fadd-rtn-insts"] = true;
-  break;
-case GK_GFX1036:
-case GK_GFX1035:
-case GK_GFX1034:
-case GK_GFX1033:
-case GK_GFX1032:
-case GK_GFX1031:
-case GK_GFX1030:
-  IsWave32Capable = true;
-  Features["ci-insts"] = true;
-  Features["dot1-insts"] = true;
-  Features["dot2-insts"] = true;
-  Features["dot5-insts"] = true;
-  Features["dot6-insts"] = true;
-  Features["dot7-insts"] = true;
-  Features["dot10-insts"] = true;
-  Features["dl-insts"] = true;
-  Features["16-bit-insts"] = true;
-  Features["dpp"] = true;
-  Features["gfx8-insts"] = true;
-  Features["gfx9-insts"] = true;
-  Features["gfx10-insts"] = true;
-  Features["gfx10-3-insts"] = true;
-  Features["s-memrealtime"] = true;
-  Features["s-memtime-inst"] = true;
-  break;
-case GK_GFX1012:
-case GK_GFX1011:
-  Features["dot1-insts"] = true;
-  Features["dot2-insts"] = true;
-  Features["dot5-insts"] = true;
-  Features["dot6-insts"] = true;
-  Features["dot7-insts"] = true;
-  Features["dot10-insts"] = true;
-  [[fallthrough]];
-case GK_GFX1013:
-case GK_GFX1010:
-  IsWave32Capable = true;
-  Features["dl-insts"] = true;
-  Features["ci-insts"] = true;
-  Features["16-bit-insts"] = true;
-  Features["dpp"] = true;
-  Features["gfx8-insts"] = true;
-  Features["gfx9-insts"] = true;
-  Features["gfx10-insts"] = true;
-  Features["s-memrealtime"] = true;
-  Features["s-memtime-inst"] = true;
-  break;
-case GK_GFX940:
-  Features["gfx940-insts"] = true;
-  Features["fp8-insts"] = true;
-  Features["atomic-ds-pk-add-16-insts"] = true;
-  Features["atomic-flat-pk-add-16-insts"] = true;
-  Features["atomic-global-pk-add-bf16-inst"] = true;
-  [[fallthrough]];
-case GK_GFX90A:
-  Features["gfx90a-insts"] = true;
-  Features["atomic-buffer-global-pk-add-f16-insts"] = true;
-  Features["atomic-fadd-rtn-insts"] = true;
-  [[fallthrough]];
-case GK_GFX908:
-  Features["dot3-insts"] = true;
-  Features["dot4-insts"] = true;
-  Features["dot5-insts"] = true;
-  Features["dot6-insts"] = true;
-  Features["mai-insts"] = true;
-  [[fallthrough]];
-case GK_GFX906:
-  Features["dl-insts"] = true;
-  Features["dot1-insts"] = true;
-  Features["dot2-insts"] = true;
-   

[clang] baca3c1 - Move SIMD alignment calculation to LLVM Frontend

2023-02-10 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2023-02-10T04:11:54-06:00
New Revision: baca3c150733c89686287ba4927c351eec9695e2

URL: 
https://github.com/llvm/llvm-project/commit/baca3c150733c89686287ba4927c351eec9695e2
DIFF: 
https://github.com/llvm/llvm-project/commit/baca3c150733c89686287ba4927c351eec9695e2.diff

LOG: Move SIMD alignment calculation to LLVM Frontend

Currently default simd alignment is defined by Clang specific TargetInfo class.
This class cannot be reused for LLVM Flang. That's why default simd alignment
calculation has been moved to OMPIRBuilder which is common for Flang and Clang.

Previous attempt: https://reviews.llvm.org/D138496 was wrong because
the default alignment depended on the number of built LLVM targets.

If we wanted to calculate the default alignment for PPC and we hadn't specified
PPC LLVM target to build, then we would get 0 as the alignment because
OMPIRBuilder couldn't create PPCTargetMachine object and it returned 0 as
the default value.

If PPC LLVM target had been built earlier, then OMPIRBuilder could have created
PPCTargetMachine object and it would have returned 128.

Differential Revision: https://reviews.llvm.org/D141910

Reviewed By: jdoerfert

Added: 


Modified: 
clang/include/clang/Basic/TargetInfo.h
clang/lib/AST/ASTContext.cpp
clang/lib/AST/CMakeLists.txt
clang/lib/Basic/TargetInfo.cpp
clang/lib/Basic/Targets/PPC.h
clang/lib/Basic/Targets/WebAssembly.h
clang/lib/Basic/Targets/X86.cpp
lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/TargetInfo.h 
b/clang/include/clang/Basic/TargetInfo.h
index 7b6daa0c36724..1bdee3f416a90 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -225,7 +225,6 @@ class TargetInfo : public virtual TransferrableTargetInfo,
   bool HasStrictFP;
 
   unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth;
-  unsigned short SimdDefaultAlign;
   std::string DataLayoutString;
   const char *UserLabelPrefix;
   const char *MCountName;
@@ -794,10 +793,6 @@ class TargetInfo : public virtual TransferrableTargetInfo,
 
   /// Return the maximum vector alignment supported for the given target.
   unsigned getMaxVectorAlign() const { return MaxVectorAlign; }
-  /// Return default simd alignment for the given target. Generally, this
-  /// value is type-specific, but this alignment can be used for most of the
-  /// types for the given target.
-  unsigned getSimdDefaultAlign() const { return SimdDefaultAlign; }
 
   unsigned getMaxOpenCLWorkGroupSize() const { return MaxOpenCLWorkGroupSize; }
 

diff  --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index beb66543cac48..a188837d4144c 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -77,6 +77,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
@@ -2539,7 +2540,8 @@ unsigned ASTContext::getTypeUnadjustedAlign(const Type 
*T) const {
 }
 
 unsigned ASTContext::getOpenMPDefaultSimdAlign(QualType T) const {
-  unsigned SimdAlign = getTargetInfo().getSimdDefaultAlign();
+  unsigned SimdAlign = llvm::OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
+  getTargetInfo().getTriple(), Target->getTargetOpts().FeatureMap);
   return SimdAlign;
 }
 

diff  --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt
index 3d8d8cc077615..0b737626a2201 100644
--- a/clang/lib/AST/CMakeLists.txt
+++ b/clang/lib/AST/CMakeLists.txt
@@ -132,4 +132,5 @@ add_clang_library(clangAST
   ClangAttrDocTable
   Opcodes
   omp_gen
+  intrinsics_gen
   )

diff  --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 70a15eb68f4ea..07af6c07031bd 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -119,7 +119,6 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 0;
   MaxVectorAlign = 0;
   MaxTLSAlign = 0;
-  SimdDefaultAlign = 0;
   SizeType = UnsignedLong;
   PtrDiffType = SignedLong;
   IntMaxType = SignedLongLong;

diff  --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index ebf0c2d0506d8..79f5d6e8c720b 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -87,7 +87,6 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public 
TargetInfo {
   PPCTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
   : TargetInfo(Triple) {
 SuitableAlign = 128;
-SimdDefaultAlign = 128;
 LongDoubleWidth = LongDoubleAlign = 128;
 LongDoubleFormat = &llvm::APFloat::PPCD

[clang] d93bdd8 - [OpenMP][MLIR][Flang][bbc][Driver] Add fopenmp-version and generate corresponding MLIR attribute

2023-05-19 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2023-05-19T06:22:40-05:00
New Revision: d93bdd8bd52a9a28bb9553aed72866df88c6c4f4

URL: 
https://github.com/llvm/llvm-project/commit/d93bdd8bd52a9a28bb9553aed72866df88c6c4f4
DIFF: 
https://github.com/llvm/llvm-project/commit/d93bdd8bd52a9a28bb9553aed72866df88c6c4f4.diff

LOG: [OpenMP][MLIR][Flang][bbc][Driver] Add fopenmp-version and generate 
corresponding MLIR attribute

This patch adds flag -fopenmp-version to the Flang frontend and bbc tool.
This flag is lowered to MLIR OpenMP flag attribute.

Differential Revision: https://reviews.llvm.org/D150354

Reviewed By: kiranchandramohan

Added: 


Modified: 
clang/include/clang/Driver/Options.td
clang/lib/Driver/ToolChains/Flang.cpp
flang/include/flang/Frontend/LangOptions.def
flang/include/flang/Tools/CrossToolHelpers.h
flang/lib/Frontend/CompilerInvocation.cpp
flang/lib/Frontend/FrontendActions.cpp
flang/test/Driver/driver-help-hidden.f90
flang/test/Driver/driver-help.f90
flang/test/Driver/omp-driver-offload.f90
flang/test/Lower/OpenMP/rtl-flags.f90
flang/tools/bbc/bbc.cpp

Removed: 




diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 64543c78f434b..86bc7c8614e3d 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2650,8 +2650,8 @@ def fomit_frame_pointer : Flag<["-"], 
"fomit-frame-pointer">, Group,
 def fopenmp : Flag<["-"], "fopenmp">, Group, Flags<[CC1Option, 
NoArgumentUnused, FlangOption, FC1Option]>,
   HelpText<"Parse OpenMP pragmas and generate parallel code.">;
 def fno_openmp : Flag<["-"], "fno-openmp">, Group, 
Flags<[NoArgumentUnused]>;
-def fopenmp_version_EQ : Joined<["-"], "fopenmp-version=">, Group, 
Flags<[CC1Option, NoArgumentUnused]>,
-  HelpText<"Set OpenMP version (e.g. 45 for OpenMP 4.5, 50 for OpenMP 5.0). 
Default value is 50.">;
+def fopenmp_version_EQ : Joined<["-"], "fopenmp-version=">, Group, 
Flags<[CC1Option, NoArgumentUnused, FlangOption, FC1Option]>,
+  HelpText<"Set OpenMP version (e.g. 45 for OpenMP 4.5, 50 for OpenMP 5.0). 
Default value is 50 for Clang and 11 for Flang">;
 defm openmp_extensions: BoolFOption<"openmp-extensions",
   LangOpts<"OpenMPExtensions">, DefaultTrue,
   PosFlagsetAttr(
+  mlir::StringAttr::get(module.getContext(), llvm::Twine{"omp.version"}),
+  mlir::omp::VersionAttr::get(module.getContext(), version));
+}
+
 #endif // FORTRAN_TOOLS_CROSS_TOOL_HELPERS_H

diff  --git a/flang/lib/Frontend/CompilerInvocation.cpp 
b/flang/lib/Frontend/CompilerInvocation.cpp
index 84478f26b4b86..12649075ef5c4 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -720,9 +720,15 @@ static bool parseDialectArgs(CompilerInvocation &res, 
llvm::opt::ArgList &args,
 Fortran::common::LanguageFeature::OpenACC);
   }
   if (args.hasArg(clang::driver::options::OPT_fopenmp)) {
+// By default OpenMP is set to 1.1 version
+res.getLangOpts().OpenMPVersion = 11;
 res.getFrontendOpts().features.Enable(
 Fortran::common::LanguageFeature::OpenMP);
-
+if (int Version = getLastArgIntValue(
+args, clang::driver::options::OPT_fopenmp_version_EQ,
+res.getLangOpts().OpenMPVersion, diags)) {
+  res.getLangOpts().OpenMPVersion = Version;
+}
 if (args.hasArg(clang::driver::options::OPT_fopenmp_is_device)) {
   res.getLangOpts().OpenMPIsDevice = 1;
 

diff  --git a/flang/lib/Frontend/FrontendActions.cpp 
b/flang/lib/Frontend/FrontendActions.cpp
index f3e643ef99a1d..efe6c25b826e6 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -285,6 +285,8 @@ bool CodeGenAction::beginSourceFileAction() {
 ci.getInvocation().getLangOpts());
 setOffloadModuleInterfaceTargetAttribute(*mlirModule, tm->getTargetCPU(),
  tm->getTargetFeatureString());
+setOpenMPVersionAttribute(*mlirModule,
+  ci.getInvocation().getLangOpts().OpenMPVersion);
   }
 
   const llvm::DataLayout &dl = tm->createDataLayout();

diff  --git a/flang/test/Driver/driver-help-hidden.f90 
b/flang/test/Driver/driver-help-hidden.f90
index 68b106cbccbff..002b30cfe283b 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -55,6 +55,8 @@
 ! CHECK-NEXT: -fno-version-loops-for-stride
 ! CHECK-NEXT:Do not create unit-strided loops (default)
 ! CHECK-NEXT: -fopenacc  Enable OpenACC
+! CHECK-NEXT: -fopenmp-version=
+! CHECK-NEXT:Set OpenMP version (e.g. 45 for OpenMP 
4.5, 50 for OpenMP 5.0). Default value is 50 for Clang and 11 for Flang
 ! CHECK-NEXT: -fopenmp   Parse OpenMP pragmas and generate 
parallel code.
 ! CHECK-NEXT: -fpass

[clang] f900567 - [OpenMP][AMDGPU] Refactor setting uniform work group size attribute

2023-04-20 Thread Dominik Adamski via cfe-commits

Author: Dominik Adamski
Date: 2023-04-21T01:35:39-05:00
New Revision: f90056767828e499f4cdf568968e19cf8b6ab81b

URL: 
https://github.com/llvm/llvm-project/commit/f90056767828e499f4cdf568968e19cf8b6ab81b
DIFF: 
https://github.com/llvm/llvm-project/commit/f90056767828e499f4cdf568968e19cf8b6ab81b.diff

LOG: [OpenMP][AMDGPU] Refactor setting uniform work group size attribute

Work group size attribute was set in Clang specific class. That's why
we cannot reuse this code in Flang.

If we move setting of this attribute to OpenMPIRBuilder, then we can reuse this
code in Flang and Clang. Function createOffloadEntry from OpenMPIRBuilder is
already used by Clang (via OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata
function).

Differential Revision: https://reviews.llvm.org/D148525

Reviewed By: jdoerfert

Added: 


Modified: 
clang/lib/CodeGen/TargetInfo.cpp
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/TargetInfo.cpp 
b/clang/lib/CodeGen/TargetInfo.cpp
index e50e07a531b55..cc332aeee40d4 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -9588,12 +9588,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
 
   const bool IsHIPKernel =
   M.getLangOpts().HIP && FD && FD->hasAttr();
-  const bool IsOpenMPkernel =
-  M.getLangOpts().OpenMPIsDevice &&
-  (F->getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL);
 
   // TODO: This should be moved to language specific attributes instead.
-  if (IsHIPKernel || IsOpenMPkernel)
+  if (IsHIPKernel)
 F->addFnAttr("uniform-work-group-size", "true");
 
   if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 12c7c42ac5fe5..cf329e40f5a44 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4892,6 +4892,8 @@ void OpenMPIRBuilder::createOffloadEntry(Constant *ID, 
Constant *Addr,
 
   // Add a function attribute for the kernel.
   Fn->addFnAttr(Attribute::get(Ctx, "kernel"));
+  if (Triple(M.getTargetTriple()).isAMDGCN())
+Fn->addFnAttr("uniform-work-group-size", "true");
 }
 
 // We only generate metadata for function that contain target regions.



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AMDGPU][OpenMP] Do not attach -fcuda-is-device flag for AMDGPU OpenMP (PR #96909)

2024-07-01 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski closed 
https://github.com/llvm/llvm-project/pull/96909
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add offload related flags for AMDGPU (PR #96742)

2024-07-01 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/96742

>From 5b487aac3c8414b6f37f6888f361ca7488094048 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Fri, 21 Jun 2024 18:03:53 +0200
Subject: [PATCH 1/2] [Flang-new][OpenMP] Add offload related flags for AMDGPU

Flang-new needs to add mlink-builtin-bitcode objects
to properly support offload code generation for AMD GPU.

fcuda-is-device flag is not used by Flang currently.
In the future it will be needed for Flang equivalent function:
AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace.
---
 clang/include/clang/Driver/Options.td |  4 +-
 clang/lib/Driver/ToolChains/Flang.cpp |  3 ++
 flang/test/Driver/omp-driver-offload.f90  | 58 +--
 flang/test/Driver/target-cpu-features.f90 |  4 +-
 flang/test/Driver/target-gpu-features.f90 |  2 +-
 5 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index dd55838dcf384..612d5793232ce 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8016,7 +8016,7 @@ def source_date_epoch : Separate<["-"], 
"source-date-epoch">,
 // CUDA Options
 
//===--===//
 
-let Visibility = [CC1Option] in {
+let Visibility = [CC1Option, FC1Option] in {
 
 def fcuda_is_device : Flag<["-"], "fcuda-is-device">,
   HelpText<"Generate code for CUDA device">,
@@ -8031,7 +8031,7 @@ def fno_cuda_host_device_constexpr : Flag<["-"], 
"fno-cuda-host-device-constexpr
   HelpText<"Don't treat unattributed constexpr functions as __host__ 
__device__.">,
   MarshallingInfoNegativeFlag>;
 
-} // let Visibility = [CC1Option]
+} // let Visibility = [CC1Option, FC1Option]
 
 
//===--===//
 // OpenMP Options
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 42b45dba2bd31..2679f284c5016 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -333,6 +333,9 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  const ToolChain &TC = getToolChain();
+  TC.addClangTargetOptions(Args, CmdArgs, Action::OffloadKind::OFK_OpenMP);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 6fb4f4ca1..da81a6ee3ba8f 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -14,12 +14,12 @@
 ! Test regular -fopenmp with offload, and invocation filtering options
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST-AND-DEVICE
 
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 
--offload-host-device \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST-AND-DEVICE
 
 ! OFFLOAD-HOST-AND-DEVICE: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -29,7 +29,7 @@
 
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 --offload-host-only 
\
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST
 
 ! OFFLOAD-HOST: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -39,7 +39,7 @@
 
 ! RUN: %flang -S -### %s 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 
--offload-device-only \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-DEVICE
 
 ! OFFLOAD-DEVICE: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -48,13 +48,13 @@
 ! OFFLOAD-DEVICE-NOT: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
 
 ! Test regular -fopenmp with offload for basic fopenmp-is-target-device flag 
addition and correct fopenmp 
-! RUN: %flang -### -fopenmp --offload-arch=gfx90a 
-fopenmp-targets=amdgcn-amd-amdhsa %s 2>&1 | FileCheck 
--check-prefixes=CHECK-OPENMP-IS-TARGET-DEVICE %s
+! RUN: %flang -### -fopenmp --offload-arch=gfx90a 
-fopenmp-targets=amdgcn-amd-amdhsa -nogpulib %s 2>&1 | FileCheck 
--check-prefixes=CHECK-OPENMP-IS-TARGET-DEVICE %s
 ! CHECK-OPENMP-IS-TARGET-DEVICE: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" 
{{.*}} "-fopenmp-is-target-device" {{.*}}.f90"
 
 ! Testing appropriate flags are gnerated and appropriately assigned by the 
dr

[clang] [flang] [Flang-new][OpenMP] Add offload related flags for AMDGPU (PR #96742)

2024-07-01 Thread Dominik Adamski via cfe-commits

DominikAdamski wrote:

Updated PR after: https://github.com/llvm/llvm-project/pull/96909/ .
Scope of changes:
`-fcuda-is-device` is not attached by OpenMP AMD GPU toolchain any more, so we 
do not need to accept this flag by Flang-new. This flag remains HIP/CUDA 
specific.
- OpenMP AMD GPU toolchain only searches and attaches required bitcode files to 
`flang -fc1` invocation.

https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add offload related flags for AMDGPU (PR #96742)

2024-07-01 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski edited 
https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-01 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski edited 
https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-01 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/96742

>From 5b487aac3c8414b6f37f6888f361ca7488094048 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Fri, 21 Jun 2024 18:03:53 +0200
Subject: [PATCH 1/3] [Flang-new][OpenMP] Add offload related flags for AMDGPU

Flang-new needs to add mlink-builtin-bitcode objects
to properly support offload code generation for AMD GPU.

fcuda-is-device flag is not used by Flang currently.
In the future it will be needed for Flang equivalent function:
AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace.
---
 clang/include/clang/Driver/Options.td |  4 +-
 clang/lib/Driver/ToolChains/Flang.cpp |  3 ++
 flang/test/Driver/omp-driver-offload.f90  | 58 +--
 flang/test/Driver/target-cpu-features.f90 |  4 +-
 flang/test/Driver/target-gpu-features.f90 |  2 +-
 5 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index dd55838dcf384..612d5793232ce 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8016,7 +8016,7 @@ def source_date_epoch : Separate<["-"], 
"source-date-epoch">,
 // CUDA Options
 
//===--===//
 
-let Visibility = [CC1Option] in {
+let Visibility = [CC1Option, FC1Option] in {
 
 def fcuda_is_device : Flag<["-"], "fcuda-is-device">,
   HelpText<"Generate code for CUDA device">,
@@ -8031,7 +8031,7 @@ def fno_cuda_host_device_constexpr : Flag<["-"], 
"fno-cuda-host-device-constexpr
   HelpText<"Don't treat unattributed constexpr functions as __host__ 
__device__.">,
   MarshallingInfoNegativeFlag>;
 
-} // let Visibility = [CC1Option]
+} // let Visibility = [CC1Option, FC1Option]
 
 
//===--===//
 // OpenMP Options
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 42b45dba2bd31..2679f284c5016 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -333,6 +333,9 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  const ToolChain &TC = getToolChain();
+  TC.addClangTargetOptions(Args, CmdArgs, Action::OffloadKind::OFK_OpenMP);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 6fb4f4ca1..da81a6ee3ba8f 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -14,12 +14,12 @@
 ! Test regular -fopenmp with offload, and invocation filtering options
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST-AND-DEVICE
 
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 
--offload-host-device \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST-AND-DEVICE
 
 ! OFFLOAD-HOST-AND-DEVICE: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -29,7 +29,7 @@
 
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 --offload-host-only 
\
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST
 
 ! OFFLOAD-HOST: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -39,7 +39,7 @@
 
 ! RUN: %flang -S -### %s 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 
--offload-device-only \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-DEVICE
 
 ! OFFLOAD-DEVICE: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -48,13 +48,13 @@
 ! OFFLOAD-DEVICE-NOT: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
 
 ! Test regular -fopenmp with offload for basic fopenmp-is-target-device flag 
addition and correct fopenmp 
-! RUN: %flang -### -fopenmp --offload-arch=gfx90a 
-fopenmp-targets=amdgcn-amd-amdhsa %s 2>&1 | FileCheck 
--check-prefixes=CHECK-OPENMP-IS-TARGET-DEVICE %s
+! RUN: %flang -### -fopenmp --offload-arch=gfx90a 
-fopenmp-targets=amdgcn-amd-amdhsa -nogpulib %s 2>&1 | FileCheck 
--check-prefixes=CHECK-OPENMP-IS-TARGET-DEVICE %s
 ! CHECK-OPENMP-IS-TARGET-DEVICE: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" 
{{.*}} "-fopenmp-is-target-device" {{.*}}.f90"
 
 ! Testing appropriate flags are gnerated and appropriately assigned by the 
dr

[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-01 Thread Dominik Adamski via cfe-commits


@@ -333,6 +333,9 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  const ToolChain &TC = getToolChain();
+  TC.addClangTargetOptions(Args, CmdArgs, Action::OffloadKind::OFK_OpenMP);

DominikAdamski wrote:

> It's OK to make Flang "stricter" if we believe that's the right thing to do 
> ;-) (I think that generating useful error/warning messages like "don't mix 
> these flags - that's not supporter" would be a good thing)

Shall I extend https://github.com/llvm/llvm-project/pull/94763 ? I don't use 
`-fcuda-is-device` anymore. Now, I'm only adding `-mlink-builtin-bitcode` flags 
to `flang-new -fc1` command. The `-mlink-builtin-bitcode` option was introduced 
by https://github.com/llvm/llvm-project/pull/94763



> > IMO can be reused between Flang and Clang
> 
> Are there any plans to extract that logic and share it somewhere?

Not yet (at least from my side).  I can return to this topic if there is need 
to support Clang option by Flang for AMD GPU.



> > I don't know if Nvidia also want to reuse their toolchain between Clang and 
> > Flang to fully support OpenMP offloading.
> 
> Who could be the right person to ask?

I don't know. Open-source LLVM Flang meetings can be good  place to ask this 
question.

https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revert "[AMDGPU][OpenMP] Do not attach -fcuda-is-device flag for AMDGPU OpenMP" (PR #97531)

2024-07-03 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski created 
https://github.com/llvm/llvm-project/pull/97531

Reverts llvm/llvm-project#96909

It breaks CI: 
https://gitlab.e4s.io/uo-public/llvm-openmp-offloading/-/jobs/283716 

>From f165b2279dc663cee7dc31d8213afb8c8fb48ab7 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Wed, 3 Jul 2024 09:06:47 +0200
Subject: [PATCH] =?UTF-8?q?Revert=20"[AMDGPU][OpenMP]=20Do=20not=20attach?=
 =?UTF-8?q?=20-fcuda-is-device=20flag=20for=20AMDGPU=20OpenM=E2=80=A6"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 8bb00cb160830ec8f6029c2aae79d3e46b04b99c.
---
 clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 2 ++
 clang/test/Driver/amdgpu-openmp-toolchain.c  | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp 
b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index b75d400e6ce91..1c0fb4babe3a5 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -47,6 +47,8 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
   assert(DeviceOffloadingKind == Action::OFK_OpenMP &&
  "Only OpenMP offloading kinds are supported.");
 
+  CC1Args.push_back("-fcuda-is-device");
+
   if (DriverArgs.hasArg(options::OPT_nogpulib))
 return;
 
diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c 
b/clang/test/Driver/amdgpu-openmp-toolchain.c
index a153c4afb0ce8..49af04acc4639 100644
--- a/clang/test/Driver/amdgpu-openmp-toolchain.c
+++ b/clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -7,7 +7,7 @@
 
 // verify the tools invocations
 // CHECK: "-cc1" "-triple" 
"x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
-// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" 
"x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "gfx906"
+// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" 
"x86_64-unknown-linux-gnu"{{.*}}"-fcuda-is-device"{{.*}}"-target-cpu" "gfx906"
 // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj"
 // CHECK: clang-linker-wrapper{{.*}} "-o" "a.out"
 

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revert "[AMDGPU][OpenMP] Do not attach -fcuda-is-device flag for AMDGPU OpenMP" (PR #97531)

2024-07-03 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski closed 
https://github.com/llvm/llvm-project/pull/97531
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-03 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/96742

>From 5b487aac3c8414b6f37f6888f361ca7488094048 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Fri, 21 Jun 2024 18:03:53 +0200
Subject: [PATCH 1/4] [Flang-new][OpenMP] Add offload related flags for AMDGPU

Flang-new needs to add mlink-builtin-bitcode objects
to properly support offload code generation for AMD GPU.

fcuda-is-device flag is not used by Flang currently.
In the future it will be needed for Flang equivalent function:
AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace.
---
 clang/include/clang/Driver/Options.td |  4 +-
 clang/lib/Driver/ToolChains/Flang.cpp |  3 ++
 flang/test/Driver/omp-driver-offload.f90  | 58 +--
 flang/test/Driver/target-cpu-features.f90 |  4 +-
 flang/test/Driver/target-gpu-features.f90 |  2 +-
 5 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index dd55838dcf384..612d5793232ce 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8016,7 +8016,7 @@ def source_date_epoch : Separate<["-"], 
"source-date-epoch">,
 // CUDA Options
 
//===--===//
 
-let Visibility = [CC1Option] in {
+let Visibility = [CC1Option, FC1Option] in {
 
 def fcuda_is_device : Flag<["-"], "fcuda-is-device">,
   HelpText<"Generate code for CUDA device">,
@@ -8031,7 +8031,7 @@ def fno_cuda_host_device_constexpr : Flag<["-"], 
"fno-cuda-host-device-constexpr
   HelpText<"Don't treat unattributed constexpr functions as __host__ 
__device__.">,
   MarshallingInfoNegativeFlag>;
 
-} // let Visibility = [CC1Option]
+} // let Visibility = [CC1Option, FC1Option]
 
 
//===--===//
 // OpenMP Options
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 42b45dba2bd31..2679f284c5016 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -333,6 +333,9 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  const ToolChain &TC = getToolChain();
+  TC.addClangTargetOptions(Args, CmdArgs, Action::OffloadKind::OFK_OpenMP);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 6fb4f4ca1..da81a6ee3ba8f 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -14,12 +14,12 @@
 ! Test regular -fopenmp with offload, and invocation filtering options
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST-AND-DEVICE
 
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 
--offload-host-device \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST-AND-DEVICE
 
 ! OFFLOAD-HOST-AND-DEVICE: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -29,7 +29,7 @@
 
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 --offload-host-only 
\
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST
 
 ! OFFLOAD-HOST: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -39,7 +39,7 @@
 
 ! RUN: %flang -S -### %s 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 
--offload-device-only \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-DEVICE
 
 ! OFFLOAD-DEVICE: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -48,13 +48,13 @@
 ! OFFLOAD-DEVICE-NOT: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
 
 ! Test regular -fopenmp with offload for basic fopenmp-is-target-device flag 
addition and correct fopenmp 
-! RUN: %flang -### -fopenmp --offload-arch=gfx90a 
-fopenmp-targets=amdgcn-amd-amdhsa %s 2>&1 | FileCheck 
--check-prefixes=CHECK-OPENMP-IS-TARGET-DEVICE %s
+! RUN: %flang -### -fopenmp --offload-arch=gfx90a 
-fopenmp-targets=amdgcn-amd-amdhsa -nogpulib %s 2>&1 | FileCheck 
--check-prefixes=CHECK-OPENMP-IS-TARGET-DEVICE %s
 ! CHECK-OPENMP-IS-TARGET-DEVICE: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" 
{{.*}} "-fopenmp-is-target-device" {{.*}}.f90"
 
 ! Testing appropriate flags are gnerated and appropriately assigned by the 
dr

[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-03 Thread Dominik Adamski via cfe-commits

DominikAdamski wrote:

@tblah Thanks for your review.
Unfortunately, I had to restore adding fcuda-is-device option ( 
https://github.com/llvm/llvm-project/pull/97531 ) because of regression related 
to handling by clang virtual functions in OpenMP target region.

https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-03 Thread Dominik Adamski via cfe-commits

DominikAdamski wrote:

@jhuber6 I'm working on that.

https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-09 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski created 
https://github.com/llvm/llvm-project/pull/88190

ROCm installation path is used for finding and automatically linking required 
bitcode libraries for OpenMP AMDGPU offload.

>From 8782af25c5946dc33342798c36c7d64569d16ab5 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Tue, 9 Apr 2024 14:35:26 -0500
Subject: [PATCH] [Flang][AMDGPU] Add rocm-path flag

ROCm installation path is used for finding and automatically
linking required bitcode libraries.
---
 clang/include/clang/Driver/Options.td|  3 ++-
 clang/lib/Driver/ToolChains/Flang.cpp|  4 
 flang/test/Driver/driver-help-hidden.f90 |  1 +
 flang/test/Driver/driver-help.f90|  1 +
 flang/test/Driver/omp-driver-offload.f90 | 21 +
 5 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 827d9d7c0c18e4..64ffb15939bb15 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1340,7 +1340,8 @@ def hip_link : Flag<["--"], "hip-link">, 
Group,
   HelpText<"Link clang-offload-bundler bundles for HIP">;
 def no_hip_rt: Flag<["-"], "no-hip-rt">, Group,
   HelpText<"Do not link against HIP runtime libraries">;
-def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group,
+def rocm_path_EQ : Joined<["--"], "rocm-path=">,
+  Visibility<[FlangOption]>, Group,
   HelpText<"ROCm installation path, used for finding and automatically linking 
required bitcode libraries.">;
 def hip_path_EQ : Joined<["--"], "hip-path=">, Group,
   HelpText<"HIP runtime installation path, used for finding HIP version and 
adding HIP include path.">;
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 2c83f70eb7887e..75e4ead81e43ed 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -342,6 +342,10 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  // Check ROCm path if specified
+  const ToolChain &TC = getToolChain();
+  TC.getDeviceLibs(Args);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/driver-help-hidden.f90 
b/flang/test/Driver/driver-help-hidden.f90
index 48f48f5384fdc5..10b15fb454b9aa 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -144,6 +144,7 @@
 ! CHECK-NEXT: -print-target-triplePrint the normalized target triple
 ! CHECK-NEXT: -pthreadSupport POSIX threads in generated code
 ! CHECK-NEXT: -P  Disable linemarker output in -E mode
+! CHECK-NEXT: --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! CHECK-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/driver-help.f90 
b/flang/test/Driver/driver-help.f90
index 38f74395a678ab..ed5af2a68eb044 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -132,6 +132,7 @@
 ! HELP-NEXT: -print-target-triplePrint the normalized target triple
 ! HELP-NEXT: -pthreadSupport POSIX threads in generated code
 ! HELP-NEXT: -P  Disable linemarker output in -E mode
+! HELP-NEXT:  --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! HELP-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 7e9a73627cd757..836dcfc85eb9de 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -201,3 +201,24 @@
 ! RUN:  -nogpulibc %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=NO-LIBC-GPU-AMDGPU %s
 ! NO-LIBC-GPU-AMDGPU-NOT: "-lcgpu-amdgpu"
+
+! RUN:   rm -rf %t/Inputs
+
+! RUN:   not %flang -### -v --target=x86_64-unknown-linux-gnu -fopenmp  \
+! RUN:  --offload-arch=gfx900 \
+! RUN:  --rocm-path=%t/Inputs/rocm %s 2>&1 \
+! RUN:   | FileCheck --check-prefix=ROCM-PATH-NOT-FOUND %s
+! ROCM-PATH-NOT-FOUND: error: cannot 

[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-09 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/88190

>From 8782af25c5946dc33342798c36c7d64569d16ab5 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Tue, 9 Apr 2024 14:35:26 -0500
Subject: [PATCH 1/2] [Flang][AMDGPU] Add rocm-path flag

ROCm installation path is used for finding and automatically
linking required bitcode libraries.
---
 clang/include/clang/Driver/Options.td|  3 ++-
 clang/lib/Driver/ToolChains/Flang.cpp|  4 
 flang/test/Driver/driver-help-hidden.f90 |  1 +
 flang/test/Driver/driver-help.f90|  1 +
 flang/test/Driver/omp-driver-offload.f90 | 21 +
 5 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 827d9d7c0c18e4..64ffb15939bb15 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1340,7 +1340,8 @@ def hip_link : Flag<["--"], "hip-link">, 
Group,
   HelpText<"Link clang-offload-bundler bundles for HIP">;
 def no_hip_rt: Flag<["-"], "no-hip-rt">, Group,
   HelpText<"Do not link against HIP runtime libraries">;
-def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group,
+def rocm_path_EQ : Joined<["--"], "rocm-path=">,
+  Visibility<[FlangOption]>, Group,
   HelpText<"ROCm installation path, used for finding and automatically linking 
required bitcode libraries.">;
 def hip_path_EQ : Joined<["--"], "hip-path=">, Group,
   HelpText<"HIP runtime installation path, used for finding HIP version and 
adding HIP include path.">;
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 2c83f70eb7887e..75e4ead81e43ed 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -342,6 +342,10 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  // Check ROCm path if specified
+  const ToolChain &TC = getToolChain();
+  TC.getDeviceLibs(Args);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/driver-help-hidden.f90 
b/flang/test/Driver/driver-help-hidden.f90
index 48f48f5384fdc5..10b15fb454b9aa 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -144,6 +144,7 @@
 ! CHECK-NEXT: -print-target-triplePrint the normalized target triple
 ! CHECK-NEXT: -pthreadSupport POSIX threads in generated code
 ! CHECK-NEXT: -P  Disable linemarker output in -E mode
+! CHECK-NEXT: --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! CHECK-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/driver-help.f90 
b/flang/test/Driver/driver-help.f90
index 38f74395a678ab..ed5af2a68eb044 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -132,6 +132,7 @@
 ! HELP-NEXT: -print-target-triplePrint the normalized target triple
 ! HELP-NEXT: -pthreadSupport POSIX threads in generated code
 ! HELP-NEXT: -P  Disable linemarker output in -E mode
+! HELP-NEXT:  --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! HELP-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 7e9a73627cd757..836dcfc85eb9de 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -201,3 +201,24 @@
 ! RUN:  -nogpulibc %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=NO-LIBC-GPU-AMDGPU %s
 ! NO-LIBC-GPU-AMDGPU-NOT: "-lcgpu-amdgpu"
+
+! RUN:   rm -rf %t/Inputs
+
+! RUN:   not %flang -### -v --target=x86_64-unknown-linux-gnu -fopenmp  \
+! RUN:  --offload-arch=gfx900 \
+! RUN:  --rocm-path=%t/Inputs/rocm %s 2>&1 \
+! RUN:   | FileCheck --check-prefix=ROCM-PATH-NOT-FOUND %s
+! ROCM-PATH-NOT-FOUND: error: cannot find ROCm device library;
+
+! RUN:   rm -rf %t/Inputs
+! RUN:   mkdir -p %t/Inputs
+! RUN:   cp -r %S/../../../clang/tes

[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-09 Thread Dominik Adamski via cfe-commits


@@ -342,6 +342,10 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  // Check ROCm path if specified
+  const ToolChain &TC = getToolChain();
+  TC.getDeviceLibs(Args);

DominikAdamski wrote:

I need it to invoke  `AMDGPUOpenMPToolChain::getDeviceLibs` to check if 
specified ROCm path is correct.

https://github.com/llvm/llvm-project/pull/88190
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-09 Thread Dominik Adamski via cfe-commits


@@ -1340,7 +1340,8 @@ def hip_link : Flag<["--"], "hip-link">, 
Group,
   HelpText<"Link clang-offload-bundler bundles for HIP">;
 def no_hip_rt: Flag<["-"], "no-hip-rt">, Group,
   HelpText<"Do not link against HIP runtime libraries">;
-def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group,
+def rocm_path_EQ : Joined<["--"], "rocm-path=">,
+  Visibility<[FlangOption]>, Group,

DominikAdamski wrote:

ninja check-clang does not report any error. Is it enough?

https://github.com/llvm/llvm-project/pull/88190
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-09 Thread Dominik Adamski via cfe-commits


@@ -201,3 +201,24 @@
 ! RUN:  -nogpulibc %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=NO-LIBC-GPU-AMDGPU %s
 ! NO-LIBC-GPU-AMDGPU-NOT: "-lcgpu-amdgpu"
+
+! RUN:   rm -rf %t/Inputs
+
+! RUN:   not %flang -### -v --target=x86_64-unknown-linux-gnu -fopenmp  \
+! RUN:  --offload-arch=gfx900 \
+! RUN:  --rocm-path=%t/Inputs/rocm %s 2>&1 \

DominikAdamski wrote:

This test checks if flang reports an error if ROCm path doesn't exist. The next 
test checks positive scenario.

https://github.com/llvm/llvm-project/pull/88190
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-10 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/88190

>From 44def17f36e8e27eb4232681e5ae7eff5de6d90f Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Tue, 9 Apr 2024 14:35:26 -0500
Subject: [PATCH 1/2] [Flang][AMDGPU] Add rocm-path flag

ROCm installation path is used for finding and automatically
linking required bitcode libraries.
---
 clang/include/clang/Driver/Options.td|  3 ++-
 clang/lib/Driver/ToolChains/Flang.cpp|  4 
 flang/test/Driver/driver-help-hidden.f90 |  1 +
 flang/test/Driver/driver-help.f90|  1 +
 flang/test/Driver/omp-driver-offload.f90 | 21 +
 5 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index f745e573eb2686..651aa10150c06e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1341,7 +1341,8 @@ def hip_link : Flag<["--"], "hip-link">, 
Group,
   HelpText<"Link clang-offload-bundler bundles for HIP">;
 def no_hip_rt: Flag<["-"], "no-hip-rt">, Group,
   HelpText<"Do not link against HIP runtime libraries">;
-def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group,
+def rocm_path_EQ : Joined<["--"], "rocm-path=">,
+  Visibility<[FlangOption]>, Group,
   HelpText<"ROCm installation path, used for finding and automatically linking 
required bitcode libraries.">;
 def hip_path_EQ : Joined<["--"], "hip-path=">, Group,
   HelpText<"HIP runtime installation path, used for finding HIP version and 
adding HIP include path.">;
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 2c83f70eb7887e..75e4ead81e43ed 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -342,6 +342,10 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  // Check ROCm path if specified
+  const ToolChain &TC = getToolChain();
+  TC.getDeviceLibs(Args);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/driver-help-hidden.f90 
b/flang/test/Driver/driver-help-hidden.f90
index 48f48f5384fdc5..10b15fb454b9aa 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -144,6 +144,7 @@
 ! CHECK-NEXT: -print-target-triplePrint the normalized target triple
 ! CHECK-NEXT: -pthreadSupport POSIX threads in generated code
 ! CHECK-NEXT: -P  Disable linemarker output in -E mode
+! CHECK-NEXT: --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! CHECK-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/driver-help.f90 
b/flang/test/Driver/driver-help.f90
index 38f74395a678ab..ed5af2a68eb044 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -132,6 +132,7 @@
 ! HELP-NEXT: -print-target-triplePrint the normalized target triple
 ! HELP-NEXT: -pthreadSupport POSIX threads in generated code
 ! HELP-NEXT: -P  Disable linemarker output in -E mode
+! HELP-NEXT:  --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! HELP-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 7e9a73627cd757..836dcfc85eb9de 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -201,3 +201,24 @@
 ! RUN:  -nogpulibc %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=NO-LIBC-GPU-AMDGPU %s
 ! NO-LIBC-GPU-AMDGPU-NOT: "-lcgpu-amdgpu"
+
+! RUN:   rm -rf %t/Inputs
+
+! RUN:   not %flang -### -v --target=x86_64-unknown-linux-gnu -fopenmp  \
+! RUN:  --offload-arch=gfx900 \
+! RUN:  --rocm-path=%t/Inputs/rocm %s 2>&1 \
+! RUN:   | FileCheck --check-prefix=ROCM-PATH-NOT-FOUND %s
+! ROCM-PATH-NOT-FOUND: error: cannot find ROCm device library;
+
+! RUN:   rm -rf %t/Inputs
+! RUN:   mkdir -p %t/Inputs
+! RUN:   cp -r %S/../../../clang/tes

[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-10 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/88190

>From 44def17f36e8e27eb4232681e5ae7eff5de6d90f Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Tue, 9 Apr 2024 14:35:26 -0500
Subject: [PATCH 1/3] [Flang][AMDGPU] Add rocm-path flag

ROCm installation path is used for finding and automatically
linking required bitcode libraries.
---
 clang/include/clang/Driver/Options.td|  3 ++-
 clang/lib/Driver/ToolChains/Flang.cpp|  4 
 flang/test/Driver/driver-help-hidden.f90 |  1 +
 flang/test/Driver/driver-help.f90|  1 +
 flang/test/Driver/omp-driver-offload.f90 | 21 +
 5 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index f745e573eb2686..651aa10150c06e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1341,7 +1341,8 @@ def hip_link : Flag<["--"], "hip-link">, 
Group,
   HelpText<"Link clang-offload-bundler bundles for HIP">;
 def no_hip_rt: Flag<["-"], "no-hip-rt">, Group,
   HelpText<"Do not link against HIP runtime libraries">;
-def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group,
+def rocm_path_EQ : Joined<["--"], "rocm-path=">,
+  Visibility<[FlangOption]>, Group,
   HelpText<"ROCm installation path, used for finding and automatically linking 
required bitcode libraries.">;
 def hip_path_EQ : Joined<["--"], "hip-path=">, Group,
   HelpText<"HIP runtime installation path, used for finding HIP version and 
adding HIP include path.">;
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 2c83f70eb7887e..75e4ead81e43ed 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -342,6 +342,10 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  // Check ROCm path if specified
+  const ToolChain &TC = getToolChain();
+  TC.getDeviceLibs(Args);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/driver-help-hidden.f90 
b/flang/test/Driver/driver-help-hidden.f90
index 48f48f5384fdc5..10b15fb454b9aa 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -144,6 +144,7 @@
 ! CHECK-NEXT: -print-target-triplePrint the normalized target triple
 ! CHECK-NEXT: -pthreadSupport POSIX threads in generated code
 ! CHECK-NEXT: -P  Disable linemarker output in -E mode
+! CHECK-NEXT: --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! CHECK-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/driver-help.f90 
b/flang/test/Driver/driver-help.f90
index 38f74395a678ab..ed5af2a68eb044 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -132,6 +132,7 @@
 ! HELP-NEXT: -print-target-triplePrint the normalized target triple
 ! HELP-NEXT: -pthreadSupport POSIX threads in generated code
 ! HELP-NEXT: -P  Disable linemarker output in -E mode
+! HELP-NEXT:  --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! HELP-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 7e9a73627cd757..836dcfc85eb9de 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -201,3 +201,24 @@
 ! RUN:  -nogpulibc %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=NO-LIBC-GPU-AMDGPU %s
 ! NO-LIBC-GPU-AMDGPU-NOT: "-lcgpu-amdgpu"
+
+! RUN:   rm -rf %t/Inputs
+
+! RUN:   not %flang -### -v --target=x86_64-unknown-linux-gnu -fopenmp  \
+! RUN:  --offload-arch=gfx900 \
+! RUN:  --rocm-path=%t/Inputs/rocm %s 2>&1 \
+! RUN:   | FileCheck --check-prefix=ROCM-PATH-NOT-FOUND %s
+! ROCM-PATH-NOT-FOUND: error: cannot find ROCm device library;
+
+! RUN:   rm -rf %t/Inputs
+! RUN:   mkdir -p %t/Inputs
+! RUN:   cp -r %S/../../../clang/tes

[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-10 Thread Dominik Adamski via cfe-commits


@@ -342,6 +342,10 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  // Check ROCm path if specified
+  const ToolChain &TC = getToolChain();
+  TC.getDeviceLibs(Args);

DominikAdamski wrote:

Please look at the recent change:
https://github.com/llvm/llvm-project/pull/88190/commits/5b106231ab0f5432ba788035635a0d9d91b22ce0
Is this solution better?

https://github.com/llvm/llvm-project/pull/88190
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-10 Thread Dominik Adamski via cfe-commits


@@ -345,7 +345,13 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 
   // Check ROCm path if specified
   const ToolChain &TC = getToolChain();
-  TC.getDeviceLibs(Args);
+  std::string HIPVersion;
+  llvm::raw_string_ostream HIPInfo(HIPVersion);
+  TC.printVerboseInfo(HIPInfo);
+  llvm::StringRef HIPInfoStrRef(HIPInfo.str());
+  if (!HIPInfoStrRef.contains("Found HIP installation") &&

DominikAdamski wrote:

Note: I can extract the `Found HIP installation` string into a separate 
constant so that it can be used by both clang and flang.

https://github.com/llvm/llvm-project/pull/88190
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang] Add option frtlib-add-rpath (PR #88280)

2024-04-10 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski created 
https://github.com/llvm/llvm-project/pull/88280

This option is used by clang and should also be visible in flang. It is already 
handled by the toolchains used by both clang and flang.

Reported issue: https://github.com/llvm/llvm-project/issues/82553

>From 1330d076904d98a0a594700cca1c3e4a6b15dd58 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Wed, 10 Apr 2024 09:42:09 -0500
Subject: [PATCH] [Flang] Add option frtlib-add-rpath

This option is used by clang and should also be visible in flang.
It is already handled by the toolchains used by both clang and flang.
---
 clang/include/clang/Driver/Options.td| 2 ++
 flang/test/Driver/driver-help-hidden.f90 | 2 ++
 flang/test/Driver/driver-help.f90| 2 ++
 3 files changed, 6 insertions(+)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index f745e573eb2686..d26c48b3585de7 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5472,10 +5472,12 @@ def rpath : Separate<["-"], "rpath">, 
Flags<[LinkerInput]>, Group,
 def rtlib_EQ : Joined<["-", "--"], "rtlib=">, Visibility<[ClangOption, 
CLOption]>,
   HelpText<"Compiler runtime library to use">;
 def frtlib_add_rpath: Flag<["-"], "frtlib-add-rpath">, 
Flags<[NoArgumentUnused]>,
+  Visibility<[ClangOption, FlangOption]>,
   HelpText<"Add -rpath with architecture-specific resource directory to the 
linker flags. "
   "When --hip-link is specified, also add -rpath with HIP runtime library 
directory to the linker flags">;
 def fno_rtlib_add_rpath: Flag<["-"], "fno-rtlib-add-rpath">,
   Flags<[NoArgumentUnused]>,
+  Visibility<[ClangOption, FlangOption]>,
   HelpText<"Do not add -rpath with architecture-specific resource directory to 
the linker flags. "
   "When --hip-link is specified, do not add -rpath with HIP runtime library 
directory to the linker flags">;
 def offload_add_rpath: Flag<["--"], "offload-add-rpath">,
diff --git a/flang/test/Driver/driver-help-hidden.f90 
b/flang/test/Driver/driver-help-hidden.f90
index 48f48f5384fdc5..46f6fb76fe41a8 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -67,6 +67,7 @@
 ! CHECK-NEXT: -fno-ltoDisable LTO mode (default)
 ! CHECK-NEXT: -fno-ppc-native-vector-element-order
 ! CHECK-NEXT: Specifies PowerPC non-native vector 
element order
+! CHECK-NEXT: -fno-rtlib-add-rpath Do not add -rpath with 
architecture-specific resource directory to the linker flags. When --hip-link 
is specified, do not add -rpath with HIP runtime library directory to the 
linker flags
 ! CHECK-NEXT: -fno-signed-zeros   Allow optimizations that ignore the sign 
of floating point zeros
 ! CHECK-NEXT: -fno-stack-arrays   Allocate array temporaries on the heap 
(default)
 ! CHECK-NEXT: -fno-version-loops-for-stride
@@ -92,6 +93,7 @@
 ! CHECK-NEXT: Specifies PowerPC native vector element 
order (default)
 ! CHECK-NEXT: -freciprocal-math   Allow division operations to be 
reassociated
 ! CHECK-NEXT: -fropi  Generate read-only position independent 
code (ARM only)
+! CHECK-NEXT: -frtlib-add-rpath Add -rpath with architecture-specific resource 
directory to the linker flags. When --hip-link is specified, also add -rpath 
with HIP runtime library directory to the linker flags
 ! CHECK-NEXT: -frwpi  Generate read-write position independent 
code (ARM only)
 ! CHECK-NEXT: -fsave-optimization-record=
 ! CHECK-NEXT: Generate an optimization record file in 
a specific format
diff --git a/flang/test/Driver/driver-help.f90 
b/flang/test/Driver/driver-help.f90
index 38f74395a678ab..f0d42090835590 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -59,6 +59,7 @@
 ! HELP-NEXT: -fno-ltoDisable LTO mode (default)
 ! HELP-NEXT: -fno-ppc-native-vector-element-order
 ! HELP-NEXT: Specifies PowerPC non-native vector 
element order
+! HELP-NEXT:  -fno-rtlib-add-rpath Do not add -rpath with 
architecture-specific resource directory to the linker flags. When --hip-link 
is specified, do not add -rpath with HIP runtime library directory to the 
linker flags
 ! HELP-NEXT: -fno-signed-zeros   Allow optimizations that ignore the sign 
of floating point zeros
 ! HELP-NEXT: -fno-stack-arrays   Allocate array temporaries on the heap 
(default)
 ! HELP-NEXT: -fno-version-loops-for-stride
@@ -80,6 +81,7 @@
 ! HELP-NEXT: Specifies PowerPC native vector element 
order (default)
 ! HELP-NEXT: -freciprocal-math   Allow division operations to be 
reassociated
 ! HELP-NEXT: -fropi  Generate read-only position independent 
code (ARM only)
+! HELP-NEXT: -frtlib-add-rpath Add -rpath with architecture-specific resource 
directory to the linker flags. When --hip-link is specified, also 

[clang] [flang] [Flang] Add option frtlib-add-rpath (PR #88280)

2024-04-11 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/88280

>From 1330d076904d98a0a594700cca1c3e4a6b15dd58 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Wed, 10 Apr 2024 09:42:09 -0500
Subject: [PATCH 1/2] [Flang] Add option frtlib-add-rpath

This option is used by clang and should also be visible in flang.
It is already handled by the toolchains used by both clang and flang.
---
 clang/include/clang/Driver/Options.td| 2 ++
 flang/test/Driver/driver-help-hidden.f90 | 2 ++
 flang/test/Driver/driver-help.f90| 2 ++
 3 files changed, 6 insertions(+)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index f745e573eb2686..d26c48b3585de7 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5472,10 +5472,12 @@ def rpath : Separate<["-"], "rpath">, 
Flags<[LinkerInput]>, Group,
 def rtlib_EQ : Joined<["-", "--"], "rtlib=">, Visibility<[ClangOption, 
CLOption]>,
   HelpText<"Compiler runtime library to use">;
 def frtlib_add_rpath: Flag<["-"], "frtlib-add-rpath">, 
Flags<[NoArgumentUnused]>,
+  Visibility<[ClangOption, FlangOption]>,
   HelpText<"Add -rpath with architecture-specific resource directory to the 
linker flags. "
   "When --hip-link is specified, also add -rpath with HIP runtime library 
directory to the linker flags">;
 def fno_rtlib_add_rpath: Flag<["-"], "fno-rtlib-add-rpath">,
   Flags<[NoArgumentUnused]>,
+  Visibility<[ClangOption, FlangOption]>,
   HelpText<"Do not add -rpath with architecture-specific resource directory to 
the linker flags. "
   "When --hip-link is specified, do not add -rpath with HIP runtime library 
directory to the linker flags">;
 def offload_add_rpath: Flag<["--"], "offload-add-rpath">,
diff --git a/flang/test/Driver/driver-help-hidden.f90 
b/flang/test/Driver/driver-help-hidden.f90
index 48f48f5384fdc5..46f6fb76fe41a8 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -67,6 +67,7 @@
 ! CHECK-NEXT: -fno-ltoDisable LTO mode (default)
 ! CHECK-NEXT: -fno-ppc-native-vector-element-order
 ! CHECK-NEXT: Specifies PowerPC non-native vector 
element order
+! CHECK-NEXT: -fno-rtlib-add-rpath Do not add -rpath with 
architecture-specific resource directory to the linker flags. When --hip-link 
is specified, do not add -rpath with HIP runtime library directory to the 
linker flags
 ! CHECK-NEXT: -fno-signed-zeros   Allow optimizations that ignore the sign 
of floating point zeros
 ! CHECK-NEXT: -fno-stack-arrays   Allocate array temporaries on the heap 
(default)
 ! CHECK-NEXT: -fno-version-loops-for-stride
@@ -92,6 +93,7 @@
 ! CHECK-NEXT: Specifies PowerPC native vector element 
order (default)
 ! CHECK-NEXT: -freciprocal-math   Allow division operations to be 
reassociated
 ! CHECK-NEXT: -fropi  Generate read-only position independent 
code (ARM only)
+! CHECK-NEXT: -frtlib-add-rpath Add -rpath with architecture-specific resource 
directory to the linker flags. When --hip-link is specified, also add -rpath 
with HIP runtime library directory to the linker flags
 ! CHECK-NEXT: -frwpi  Generate read-write position independent 
code (ARM only)
 ! CHECK-NEXT: -fsave-optimization-record=
 ! CHECK-NEXT: Generate an optimization record file in 
a specific format
diff --git a/flang/test/Driver/driver-help.f90 
b/flang/test/Driver/driver-help.f90
index 38f74395a678ab..f0d42090835590 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -59,6 +59,7 @@
 ! HELP-NEXT: -fno-ltoDisable LTO mode (default)
 ! HELP-NEXT: -fno-ppc-native-vector-element-order
 ! HELP-NEXT: Specifies PowerPC non-native vector 
element order
+! HELP-NEXT:  -fno-rtlib-add-rpath Do not add -rpath with 
architecture-specific resource directory to the linker flags. When --hip-link 
is specified, do not add -rpath with HIP runtime library directory to the 
linker flags
 ! HELP-NEXT: -fno-signed-zeros   Allow optimizations that ignore the sign 
of floating point zeros
 ! HELP-NEXT: -fno-stack-arrays   Allocate array temporaries on the heap 
(default)
 ! HELP-NEXT: -fno-version-loops-for-stride
@@ -80,6 +81,7 @@
 ! HELP-NEXT: Specifies PowerPC native vector element 
order (default)
 ! HELP-NEXT: -freciprocal-math   Allow division operations to be 
reassociated
 ! HELP-NEXT: -fropi  Generate read-only position independent 
code (ARM only)
+! HELP-NEXT: -frtlib-add-rpath Add -rpath with architecture-specific resource 
directory to the linker flags. When --hip-link is specified, also add -rpath 
with HIP runtime library directory to the linker flags
 ! HELP-NEXT: -frwpi  Generate read-write position independent 
code (ARM only)
 ! HELP-NEXT: -fsave-optimization-reco

[clang] [flang] [Flang] Add options frtlib-add-rpath and resource-dir (PR #88280)

2024-04-11 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski edited 
https://github.com/llvm/llvm-project/pull/88280
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang] Add options frtlib-add-rpath and resource-dir (PR #88280)

2024-04-11 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski edited 
https://github.com/llvm/llvm-project/pull/88280
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang] Add options frtlib-add-rpath and resource-dir (PR #88280)

2024-04-11 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/88280

>From 1330d076904d98a0a594700cca1c3e4a6b15dd58 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Wed, 10 Apr 2024 09:42:09 -0500
Subject: [PATCH 1/3] [Flang] Add option frtlib-add-rpath

This option is used by clang and should also be visible in flang.
It is already handled by the toolchains used by both clang and flang.
---
 clang/include/clang/Driver/Options.td| 2 ++
 flang/test/Driver/driver-help-hidden.f90 | 2 ++
 flang/test/Driver/driver-help.f90| 2 ++
 3 files changed, 6 insertions(+)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index f745e573eb2686..d26c48b3585de7 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5472,10 +5472,12 @@ def rpath : Separate<["-"], "rpath">, 
Flags<[LinkerInput]>, Group,
 def rtlib_EQ : Joined<["-", "--"], "rtlib=">, Visibility<[ClangOption, 
CLOption]>,
   HelpText<"Compiler runtime library to use">;
 def frtlib_add_rpath: Flag<["-"], "frtlib-add-rpath">, 
Flags<[NoArgumentUnused]>,
+  Visibility<[ClangOption, FlangOption]>,
   HelpText<"Add -rpath with architecture-specific resource directory to the 
linker flags. "
   "When --hip-link is specified, also add -rpath with HIP runtime library 
directory to the linker flags">;
 def fno_rtlib_add_rpath: Flag<["-"], "fno-rtlib-add-rpath">,
   Flags<[NoArgumentUnused]>,
+  Visibility<[ClangOption, FlangOption]>,
   HelpText<"Do not add -rpath with architecture-specific resource directory to 
the linker flags. "
   "When --hip-link is specified, do not add -rpath with HIP runtime library 
directory to the linker flags">;
 def offload_add_rpath: Flag<["--"], "offload-add-rpath">,
diff --git a/flang/test/Driver/driver-help-hidden.f90 
b/flang/test/Driver/driver-help-hidden.f90
index 48f48f5384fdc5..46f6fb76fe41a8 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -67,6 +67,7 @@
 ! CHECK-NEXT: -fno-ltoDisable LTO mode (default)
 ! CHECK-NEXT: -fno-ppc-native-vector-element-order
 ! CHECK-NEXT: Specifies PowerPC non-native vector 
element order
+! CHECK-NEXT: -fno-rtlib-add-rpath Do not add -rpath with 
architecture-specific resource directory to the linker flags. When --hip-link 
is specified, do not add -rpath with HIP runtime library directory to the 
linker flags
 ! CHECK-NEXT: -fno-signed-zeros   Allow optimizations that ignore the sign 
of floating point zeros
 ! CHECK-NEXT: -fno-stack-arrays   Allocate array temporaries on the heap 
(default)
 ! CHECK-NEXT: -fno-version-loops-for-stride
@@ -92,6 +93,7 @@
 ! CHECK-NEXT: Specifies PowerPC native vector element 
order (default)
 ! CHECK-NEXT: -freciprocal-math   Allow division operations to be 
reassociated
 ! CHECK-NEXT: -fropi  Generate read-only position independent 
code (ARM only)
+! CHECK-NEXT: -frtlib-add-rpath Add -rpath with architecture-specific resource 
directory to the linker flags. When --hip-link is specified, also add -rpath 
with HIP runtime library directory to the linker flags
 ! CHECK-NEXT: -frwpi  Generate read-write position independent 
code (ARM only)
 ! CHECK-NEXT: -fsave-optimization-record=
 ! CHECK-NEXT: Generate an optimization record file in 
a specific format
diff --git a/flang/test/Driver/driver-help.f90 
b/flang/test/Driver/driver-help.f90
index 38f74395a678ab..f0d42090835590 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -59,6 +59,7 @@
 ! HELP-NEXT: -fno-ltoDisable LTO mode (default)
 ! HELP-NEXT: -fno-ppc-native-vector-element-order
 ! HELP-NEXT: Specifies PowerPC non-native vector 
element order
+! HELP-NEXT:  -fno-rtlib-add-rpath Do not add -rpath with 
architecture-specific resource directory to the linker flags. When --hip-link 
is specified, do not add -rpath with HIP runtime library directory to the 
linker flags
 ! HELP-NEXT: -fno-signed-zeros   Allow optimizations that ignore the sign 
of floating point zeros
 ! HELP-NEXT: -fno-stack-arrays   Allocate array temporaries on the heap 
(default)
 ! HELP-NEXT: -fno-version-loops-for-stride
@@ -80,6 +81,7 @@
 ! HELP-NEXT: Specifies PowerPC native vector element 
order (default)
 ! HELP-NEXT: -freciprocal-math   Allow division operations to be 
reassociated
 ! HELP-NEXT: -fropi  Generate read-only position independent 
code (ARM only)
+! HELP-NEXT: -frtlib-add-rpath Add -rpath with architecture-specific resource 
directory to the linker flags. When --hip-link is specified, also add -rpath 
with HIP runtime library directory to the linker flags
 ! HELP-NEXT: -frwpi  Generate read-write position independent 
code (ARM only)
 ! HELP-NEXT: -fsave-optimization-reco

[clang] [flang] [Flang] Add options frtlib-add-rpath and resource-dir (PR #88280)

2024-04-11 Thread Dominik Adamski via cfe-commits


@@ -0,0 +1,32 @@
+// REQUIRES: x86-registered-target

DominikAdamski wrote:

Done

https://github.com/llvm/llvm-project/pull/88280
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang] Add options frtlib-add-rpath and resource-dir (PR #88280)

2024-04-11 Thread Dominik Adamski via cfe-commits

DominikAdamski wrote:

> clang already tests this pretty well, but I'd still like to have at least one 
> check to make sure that flang is actually putting -rpath in the linker.

Done. I added test.

https://github.com/llvm/llvm-project/pull/88280
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-11 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/88190

>From 44def17f36e8e27eb4232681e5ae7eff5de6d90f Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Tue, 9 Apr 2024 14:35:26 -0500
Subject: [PATCH 1/3] [Flang][AMDGPU] Add rocm-path flag

ROCm installation path is used for finding and automatically
linking required bitcode libraries.
---
 clang/include/clang/Driver/Options.td|  3 ++-
 clang/lib/Driver/ToolChains/Flang.cpp|  4 
 flang/test/Driver/driver-help-hidden.f90 |  1 +
 flang/test/Driver/driver-help.f90|  1 +
 flang/test/Driver/omp-driver-offload.f90 | 21 +
 5 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index f745e573eb2686..651aa10150c06e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1341,7 +1341,8 @@ def hip_link : Flag<["--"], "hip-link">, 
Group,
   HelpText<"Link clang-offload-bundler bundles for HIP">;
 def no_hip_rt: Flag<["-"], "no-hip-rt">, Group,
   HelpText<"Do not link against HIP runtime libraries">;
-def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group,
+def rocm_path_EQ : Joined<["--"], "rocm-path=">,
+  Visibility<[FlangOption]>, Group,
   HelpText<"ROCm installation path, used for finding and automatically linking 
required bitcode libraries.">;
 def hip_path_EQ : Joined<["--"], "hip-path=">, Group,
   HelpText<"HIP runtime installation path, used for finding HIP version and 
adding HIP include path.">;
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 2c83f70eb7887e..75e4ead81e43ed 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -342,6 +342,10 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  // Check ROCm path if specified
+  const ToolChain &TC = getToolChain();
+  TC.getDeviceLibs(Args);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/driver-help-hidden.f90 
b/flang/test/Driver/driver-help-hidden.f90
index 48f48f5384fdc5..10b15fb454b9aa 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -144,6 +144,7 @@
 ! CHECK-NEXT: -print-target-triplePrint the normalized target triple
 ! CHECK-NEXT: -pthreadSupport POSIX threads in generated code
 ! CHECK-NEXT: -P  Disable linemarker output in -E mode
+! CHECK-NEXT: --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! CHECK-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/driver-help.f90 
b/flang/test/Driver/driver-help.f90
index 38f74395a678ab..ed5af2a68eb044 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -132,6 +132,7 @@
 ! HELP-NEXT: -print-target-triplePrint the normalized target triple
 ! HELP-NEXT: -pthreadSupport POSIX threads in generated code
 ! HELP-NEXT: -P  Disable linemarker output in -E mode
+! HELP-NEXT:  --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! HELP-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 7e9a73627cd757..836dcfc85eb9de 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -201,3 +201,24 @@
 ! RUN:  -nogpulibc %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=NO-LIBC-GPU-AMDGPU %s
 ! NO-LIBC-GPU-AMDGPU-NOT: "-lcgpu-amdgpu"
+
+! RUN:   rm -rf %t/Inputs
+
+! RUN:   not %flang -### -v --target=x86_64-unknown-linux-gnu -fopenmp  \
+! RUN:  --offload-arch=gfx900 \
+! RUN:  --rocm-path=%t/Inputs/rocm %s 2>&1 \
+! RUN:   | FileCheck --check-prefix=ROCM-PATH-NOT-FOUND %s
+! ROCM-PATH-NOT-FOUND: error: cannot find ROCm device library;
+
+! RUN:   rm -rf %t/Inputs
+! RUN:   mkdir -p %t/Inputs
+! RUN:   cp -r %S/../../../clang/tes

[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-11 Thread Dominik Adamski via cfe-commits


@@ -345,7 +345,13 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 
   // Check ROCm path if specified
   const ToolChain &TC = getToolChain();
-  TC.getDeviceLibs(Args);
+  std::string HIPVersion;
+  llvm::raw_string_ostream HIPInfo(HIPVersion);
+  TC.printVerboseInfo(HIPInfo);
+  llvm::StringRef HIPInfoStrRef(HIPInfo.str());
+  if (!HIPInfoStrRef.contains("Found HIP installation") &&

DominikAdamski wrote:

Clang reports an error if `--rocm-path` points to an invalid directory. I need 
to do a similar check for Flang.

The ROCm toolchain checks rocm-path within function 
`RocmInstallationDetector::checkCommonBitcodeLibs`. This function is called 
inside `ROCMToolChain::addClangTargetOptions` or 
`ROCMToolChain::getCommonDeviceLibNames`. 
`ROCMToolChain::getCommonDeviceLibNames` is called by 
`AMDGPUOpenMPToolChain::getDeviceLibs`. I decided not to call  
`ROCMToolChain::addClangTargetOptions` because Flang does not support all Clang 
options. That's why I initially decided to call 
`AMDGPUOpenMPToolChain::getDeviceLibs` to check the ROCm path. The second 
(current) approach is a workaround to emit an error if rocm-path is specified 
incorrectly.

https://github.com/llvm/llvm-project/pull/88190
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang] Add options frtlib-add-rpath and resource-dir (PR #88280)

2024-04-12 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski closed 
https://github.com/llvm/llvm-project/pull/88280
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-12 Thread Dominik Adamski via cfe-commits


@@ -345,7 +345,13 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 
   // Check ROCm path if specified
   const ToolChain &TC = getToolChain();
-  TC.getDeviceLibs(Args);
+  std::string HIPVersion;
+  llvm::raw_string_ostream HIPInfo(HIPVersion);
+  TC.printVerboseInfo(HIPInfo);
+  llvm::StringRef HIPInfoStrRef(HIPInfo.str());
+  if (!HIPInfoStrRef.contains("Found HIP installation") &&

DominikAdamski wrote:

test command:
`clang -fopenmp --offload-arch=gfx90a test.c`

Function call:
``` 
clang/lib/Driver/ToolChains/Clang.cpp : void Clang::ConstructJob
TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind());
  |
  V
clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp: void 
AMDGPUOpenMPToolChain::addClangTargetOptions
   for (auto BCFile : getDeviceLibs(DriverArgs)) {
  |
  V
 clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp: 
AMDGPUOpenMPToolChain::getDeviceLibs (
  |
  V
   if (!RocmInstallation->hasDeviceLibrary()) {
getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
return {};
  }
  ``` 

https://github.com/llvm/llvm-project/pull/88190
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-12 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/88190

>From 44def17f36e8e27eb4232681e5ae7eff5de6d90f Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Tue, 9 Apr 2024 14:35:26 -0500
Subject: [PATCH 1/4] [Flang][AMDGPU] Add rocm-path flag

ROCm installation path is used for finding and automatically
linking required bitcode libraries.
---
 clang/include/clang/Driver/Options.td|  3 ++-
 clang/lib/Driver/ToolChains/Flang.cpp|  4 
 flang/test/Driver/driver-help-hidden.f90 |  1 +
 flang/test/Driver/driver-help.f90|  1 +
 flang/test/Driver/omp-driver-offload.f90 | 21 +
 5 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index f745e573eb2686..651aa10150c06e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1341,7 +1341,8 @@ def hip_link : Flag<["--"], "hip-link">, 
Group,
   HelpText<"Link clang-offload-bundler bundles for HIP">;
 def no_hip_rt: Flag<["-"], "no-hip-rt">, Group,
   HelpText<"Do not link against HIP runtime libraries">;
-def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group,
+def rocm_path_EQ : Joined<["--"], "rocm-path=">,
+  Visibility<[FlangOption]>, Group,
   HelpText<"ROCm installation path, used for finding and automatically linking 
required bitcode libraries.">;
 def hip_path_EQ : Joined<["--"], "hip-path=">, Group,
   HelpText<"HIP runtime installation path, used for finding HIP version and 
adding HIP include path.">;
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 2c83f70eb7887e..75e4ead81e43ed 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -342,6 +342,10 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  // Check ROCm path if specified
+  const ToolChain &TC = getToolChain();
+  TC.getDeviceLibs(Args);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/driver-help-hidden.f90 
b/flang/test/Driver/driver-help-hidden.f90
index 48f48f5384fdc5..10b15fb454b9aa 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -144,6 +144,7 @@
 ! CHECK-NEXT: -print-target-triplePrint the normalized target triple
 ! CHECK-NEXT: -pthreadSupport POSIX threads in generated code
 ! CHECK-NEXT: -P  Disable linemarker output in -E mode
+! CHECK-NEXT: --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! CHECK-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/driver-help.f90 
b/flang/test/Driver/driver-help.f90
index 38f74395a678ab..ed5af2a68eb044 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -132,6 +132,7 @@
 ! HELP-NEXT: -print-target-triplePrint the normalized target triple
 ! HELP-NEXT: -pthreadSupport POSIX threads in generated code
 ! HELP-NEXT: -P  Disable linemarker output in -E mode
+! HELP-NEXT:  --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! HELP-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 7e9a73627cd757..836dcfc85eb9de 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -201,3 +201,24 @@
 ! RUN:  -nogpulibc %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=NO-LIBC-GPU-AMDGPU %s
 ! NO-LIBC-GPU-AMDGPU-NOT: "-lcgpu-amdgpu"
+
+! RUN:   rm -rf %t/Inputs
+
+! RUN:   not %flang -### -v --target=x86_64-unknown-linux-gnu -fopenmp  \
+! RUN:  --offload-arch=gfx900 \
+! RUN:  --rocm-path=%t/Inputs/rocm %s 2>&1 \
+! RUN:   | FileCheck --check-prefix=ROCM-PATH-NOT-FOUND %s
+! ROCM-PATH-NOT-FOUND: error: cannot find ROCm device library;
+
+! RUN:   rm -rf %t/Inputs
+! RUN:   mkdir -p %t/Inputs
+! RUN:   cp -r %S/../../../clang/tes

[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-12 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/88190

>From 44def17f36e8e27eb4232681e5ae7eff5de6d90f Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Tue, 9 Apr 2024 14:35:26 -0500
Subject: [PATCH 1/4] [Flang][AMDGPU] Add rocm-path flag

ROCm installation path is used for finding and automatically
linking required bitcode libraries.
---
 clang/include/clang/Driver/Options.td|  3 ++-
 clang/lib/Driver/ToolChains/Flang.cpp|  4 
 flang/test/Driver/driver-help-hidden.f90 |  1 +
 flang/test/Driver/driver-help.f90|  1 +
 flang/test/Driver/omp-driver-offload.f90 | 21 +
 5 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index f745e573eb2686..651aa10150c06e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1341,7 +1341,8 @@ def hip_link : Flag<["--"], "hip-link">, 
Group,
   HelpText<"Link clang-offload-bundler bundles for HIP">;
 def no_hip_rt: Flag<["-"], "no-hip-rt">, Group,
   HelpText<"Do not link against HIP runtime libraries">;
-def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group,
+def rocm_path_EQ : Joined<["--"], "rocm-path=">,
+  Visibility<[FlangOption]>, Group,
   HelpText<"ROCm installation path, used for finding and automatically linking 
required bitcode libraries.">;
 def hip_path_EQ : Joined<["--"], "hip-path=">, Group,
   HelpText<"HIP runtime installation path, used for finding HIP version and 
adding HIP include path.">;
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 2c83f70eb7887e..75e4ead81e43ed 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -342,6 +342,10 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  // Check ROCm path if specified
+  const ToolChain &TC = getToolChain();
+  TC.getDeviceLibs(Args);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/driver-help-hidden.f90 
b/flang/test/Driver/driver-help-hidden.f90
index 48f48f5384fdc5..10b15fb454b9aa 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -144,6 +144,7 @@
 ! CHECK-NEXT: -print-target-triplePrint the normalized target triple
 ! CHECK-NEXT: -pthreadSupport POSIX threads in generated code
 ! CHECK-NEXT: -P  Disable linemarker output in -E mode
+! CHECK-NEXT: --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! CHECK-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/driver-help.f90 
b/flang/test/Driver/driver-help.f90
index 38f74395a678ab..ed5af2a68eb044 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -132,6 +132,7 @@
 ! HELP-NEXT: -print-target-triplePrint the normalized target triple
 ! HELP-NEXT: -pthreadSupport POSIX threads in generated code
 ! HELP-NEXT: -P  Disable linemarker output in -E mode
+! HELP-NEXT:  --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! HELP-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 7e9a73627cd757..836dcfc85eb9de 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -201,3 +201,24 @@
 ! RUN:  -nogpulibc %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=NO-LIBC-GPU-AMDGPU %s
 ! NO-LIBC-GPU-AMDGPU-NOT: "-lcgpu-amdgpu"
+
+! RUN:   rm -rf %t/Inputs
+
+! RUN:   not %flang -### -v --target=x86_64-unknown-linux-gnu -fopenmp  \
+! RUN:  --offload-arch=gfx900 \
+! RUN:  --rocm-path=%t/Inputs/rocm %s 2>&1 \
+! RUN:   | FileCheck --check-prefix=ROCM-PATH-NOT-FOUND %s
+! ROCM-PATH-NOT-FOUND: error: cannot find ROCm device library;
+
+! RUN:   rm -rf %t/Inputs
+! RUN:   mkdir -p %t/Inputs
+! RUN:   cp -r %S/../../../clang/tes

[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-12 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/88190

>From 44def17f36e8e27eb4232681e5ae7eff5de6d90f Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Tue, 9 Apr 2024 14:35:26 -0500
Subject: [PATCH 1/5] [Flang][AMDGPU] Add rocm-path flag

ROCm installation path is used for finding and automatically
linking required bitcode libraries.
---
 clang/include/clang/Driver/Options.td|  3 ++-
 clang/lib/Driver/ToolChains/Flang.cpp|  4 
 flang/test/Driver/driver-help-hidden.f90 |  1 +
 flang/test/Driver/driver-help.f90|  1 +
 flang/test/Driver/omp-driver-offload.f90 | 21 +
 5 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index f745e573eb2686..651aa10150c06e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1341,7 +1341,8 @@ def hip_link : Flag<["--"], "hip-link">, 
Group,
   HelpText<"Link clang-offload-bundler bundles for HIP">;
 def no_hip_rt: Flag<["-"], "no-hip-rt">, Group,
   HelpText<"Do not link against HIP runtime libraries">;
-def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group,
+def rocm_path_EQ : Joined<["--"], "rocm-path=">,
+  Visibility<[FlangOption]>, Group,
   HelpText<"ROCm installation path, used for finding and automatically linking 
required bitcode libraries.">;
 def hip_path_EQ : Joined<["--"], "hip-path=">, Group,
   HelpText<"HIP runtime installation path, used for finding HIP version and 
adding HIP include path.">;
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 2c83f70eb7887e..75e4ead81e43ed 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -342,6 +342,10 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  // Check ROCm path if specified
+  const ToolChain &TC = getToolChain();
+  TC.getDeviceLibs(Args);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/driver-help-hidden.f90 
b/flang/test/Driver/driver-help-hidden.f90
index 48f48f5384fdc5..10b15fb454b9aa 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -144,6 +144,7 @@
 ! CHECK-NEXT: -print-target-triplePrint the normalized target triple
 ! CHECK-NEXT: -pthreadSupport POSIX threads in generated code
 ! CHECK-NEXT: -P  Disable linemarker output in -E mode
+! CHECK-NEXT: --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! CHECK-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! CHECK-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/driver-help.f90 
b/flang/test/Driver/driver-help.f90
index 38f74395a678ab..ed5af2a68eb044 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -132,6 +132,7 @@
 ! HELP-NEXT: -print-target-triplePrint the normalized target triple
 ! HELP-NEXT: -pthreadSupport POSIX threads in generated code
 ! HELP-NEXT: -P  Disable linemarker output in -E mode
+! HELP-NEXT:  --rocm-path= ROCm installation path, used for finding and 
automatically linking required bitcode libraries.
 ! HELP-NEXT: -Rpass-analysis= Report transformation analysis from 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass-missed=   Report missed transformations by 
optimization passes whose name matches the given POSIX regular expression
 ! HELP-NEXT: -Rpass=  Report transformations performed by 
optimization passes whose name matches the given POSIX regular expression
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 7e9a73627cd757..836dcfc85eb9de 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -201,3 +201,24 @@
 ! RUN:  -nogpulibc %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=NO-LIBC-GPU-AMDGPU %s
 ! NO-LIBC-GPU-AMDGPU-NOT: "-lcgpu-amdgpu"
+
+! RUN:   rm -rf %t/Inputs
+
+! RUN:   not %flang -### -v --target=x86_64-unknown-linux-gnu -fopenmp  \
+! RUN:  --offload-arch=gfx900 \
+! RUN:  --rocm-path=%t/Inputs/rocm %s 2>&1 \
+! RUN:   | FileCheck --check-prefix=ROCM-PATH-NOT-FOUND %s
+! ROCM-PATH-NOT-FOUND: error: cannot find ROCm device library;
+
+! RUN:   rm -rf %t/Inputs
+! RUN:   mkdir -p %t/Inputs
+! RUN:   cp -r %S/../../../clang/tes

[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-12 Thread Dominik Adamski via cfe-commits


@@ -201,3 +201,16 @@
 ! RUN:  -nogpulibc %s 2>&1 \
 ! RUN:   | FileCheck --check-prefix=NO-LIBC-GPU-AMDGPU %s
 ! NO-LIBC-GPU-AMDGPU-NOT: "-lcgpu-amdgpu"
+
+! RUN:   rm -rf %t/Inputs
+! RUN:   mkdir -p %t/Inputs
+! RUN:   cp -r %S/../../../clang/test/Driver/Inputs/rocm %t/Inputs

DominikAdamski wrote:

Done

https://github.com/llvm/llvm-project/pull/88190
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-12 Thread Dominik Adamski via cfe-commits


@@ -345,7 +345,13 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 
   // Check ROCm path if specified
   const ToolChain &TC = getToolChain();
-  TC.getDeviceLibs(Args);
+  std::string HIPVersion;
+  llvm::raw_string_ostream HIPInfo(HIPVersion);
+  TC.printVerboseInfo(HIPInfo);
+  llvm::StringRef HIPInfoStrRef(HIPInfo.str());
+  if (!HIPInfoStrRef.contains("Found HIP installation") &&

DominikAdamski wrote:

No, we do not use them in Flang

https://github.com/llvm/llvm-project/pull/88190
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang][AMDGPU] Add rocm-path flag (PR #88190)

2024-04-12 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski closed 
https://github.com/llvm/llvm-project/pull/88190
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-26 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/96742

>From 5b487aac3c8414b6f37f6888f361ca7488094048 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Fri, 21 Jun 2024 18:03:53 +0200
Subject: [PATCH 1/5] [Flang-new][OpenMP] Add offload related flags for AMDGPU

Flang-new needs to add mlink-builtin-bitcode objects
to properly support offload code generation for AMD GPU.

fcuda-is-device flag is not used by Flang currently.
In the future it will be needed for Flang equivalent function:
AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace.
---
 clang/include/clang/Driver/Options.td |  4 +-
 clang/lib/Driver/ToolChains/Flang.cpp |  3 ++
 flang/test/Driver/omp-driver-offload.f90  | 58 +--
 flang/test/Driver/target-cpu-features.f90 |  4 +-
 flang/test/Driver/target-gpu-features.f90 |  2 +-
 5 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index dd55838dcf384..612d5793232ce 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8016,7 +8016,7 @@ def source_date_epoch : Separate<["-"], 
"source-date-epoch">,
 // CUDA Options
 
//===--===//
 
-let Visibility = [CC1Option] in {
+let Visibility = [CC1Option, FC1Option] in {
 
 def fcuda_is_device : Flag<["-"], "fcuda-is-device">,
   HelpText<"Generate code for CUDA device">,
@@ -8031,7 +8031,7 @@ def fno_cuda_host_device_constexpr : Flag<["-"], 
"fno-cuda-host-device-constexpr
   HelpText<"Don't treat unattributed constexpr functions as __host__ 
__device__.">,
   MarshallingInfoNegativeFlag>;
 
-} // let Visibility = [CC1Option]
+} // let Visibility = [CC1Option, FC1Option]
 
 
//===--===//
 // OpenMP Options
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 42b45dba2bd31..2679f284c5016 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -333,6 +333,9 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  const ToolChain &TC = getToolChain();
+  TC.addClangTargetOptions(Args, CmdArgs, Action::OffloadKind::OFK_OpenMP);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 6fb4f4ca1..da81a6ee3ba8f 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -14,12 +14,12 @@
 ! Test regular -fopenmp with offload, and invocation filtering options
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST-AND-DEVICE
 
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 
--offload-host-device \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST-AND-DEVICE
 
 ! OFFLOAD-HOST-AND-DEVICE: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -29,7 +29,7 @@
 
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 --offload-host-only 
\
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST
 
 ! OFFLOAD-HOST: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -39,7 +39,7 @@
 
 ! RUN: %flang -S -### %s 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 
--offload-device-only \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-DEVICE
 
 ! OFFLOAD-DEVICE: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -48,13 +48,13 @@
 ! OFFLOAD-DEVICE-NOT: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
 
 ! Test regular -fopenmp with offload for basic fopenmp-is-target-device flag 
addition and correct fopenmp 
-! RUN: %flang -### -fopenmp --offload-arch=gfx90a 
-fopenmp-targets=amdgcn-amd-amdhsa %s 2>&1 | FileCheck 
--check-prefixes=CHECK-OPENMP-IS-TARGET-DEVICE %s
+! RUN: %flang -### -fopenmp --offload-arch=gfx90a 
-fopenmp-targets=amdgcn-amd-amdhsa -nogpulib %s 2>&1 | FileCheck 
--check-prefixes=CHECK-OPENMP-IS-TARGET-DEVICE %s
 ! CHECK-OPENMP-IS-TARGET-DEVICE: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" 
{{.*}} "-fopenmp-is-target-device" {{.*}}.f90"
 
 ! Testing appropriate flags are gnerated and appropriately assigned by the 
dr

[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-26 Thread Dominik Adamski via cfe-commits

DominikAdamski wrote:

> > > Who could be the right person to ask?
> > 
> > I don't know. Open-source LLVM Flang meetings can be good place to ask this 
> > question.
>
> Did you ask? What feedback did you get?

@banach-space I asked question on flang-slack, I mentioned the issue on the 
latest Flang technical meeting and I described potential solution here: 
https://discourse.llvm.org/t/offloading-on-nvptx64-target-with-flang-new-leads-to-undefined-reference-s/80237
 . I got no feedback.

Can I merge this PR? The issue with -`fcuda-is-device` is resolved. If you wish 
I can extend driver checks for `-mlink-builtin-bitcode` as a separate PR.

https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-29 Thread Dominik Adamski via cfe-commits

DominikAdamski wrote:

@jhuber6 You are right. Flang-new for AMD GPU requires `-mlink-builtin-bitcode` 
for math functions.

https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-29 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski updated 
https://github.com/llvm/llvm-project/pull/96742

>From 5b487aac3c8414b6f37f6888f361ca7488094048 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Fri, 21 Jun 2024 18:03:53 +0200
Subject: [PATCH 1/5] [Flang-new][OpenMP] Add offload related flags for AMDGPU

Flang-new needs to add mlink-builtin-bitcode objects
to properly support offload code generation for AMD GPU.

fcuda-is-device flag is not used by Flang currently.
In the future it will be needed for Flang equivalent function:
AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace.
---
 clang/include/clang/Driver/Options.td |  4 +-
 clang/lib/Driver/ToolChains/Flang.cpp |  3 ++
 flang/test/Driver/omp-driver-offload.f90  | 58 +--
 flang/test/Driver/target-cpu-features.f90 |  4 +-
 flang/test/Driver/target-gpu-features.f90 |  2 +-
 5 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index dd55838dcf384..612d5793232ce 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8016,7 +8016,7 @@ def source_date_epoch : Separate<["-"], 
"source-date-epoch">,
 // CUDA Options
 
//===--===//
 
-let Visibility = [CC1Option] in {
+let Visibility = [CC1Option, FC1Option] in {
 
 def fcuda_is_device : Flag<["-"], "fcuda-is-device">,
   HelpText<"Generate code for CUDA device">,
@@ -8031,7 +8031,7 @@ def fno_cuda_host_device_constexpr : Flag<["-"], 
"fno-cuda-host-device-constexpr
   HelpText<"Don't treat unattributed constexpr functions as __host__ 
__device__.">,
   MarshallingInfoNegativeFlag>;
 
-} // let Visibility = [CC1Option]
+} // let Visibility = [CC1Option, FC1Option]
 
 
//===--===//
 // OpenMP Options
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 42b45dba2bd31..2679f284c5016 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -333,6 +333,9 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  const ToolChain &TC = getToolChain();
+  TC.addClangTargetOptions(Args, CmdArgs, Action::OffloadKind::OFK_OpenMP);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 6fb4f4ca1..da81a6ee3ba8f 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -14,12 +14,12 @@
 ! Test regular -fopenmp with offload, and invocation filtering options
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST-AND-DEVICE
 
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 
--offload-host-device \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST-AND-DEVICE
 
 ! OFFLOAD-HOST-AND-DEVICE: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -29,7 +29,7 @@
 
 ! RUN: %flang -S -### %s -o %t 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 --offload-host-only 
\
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-HOST
 
 ! OFFLOAD-HOST: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -39,7 +39,7 @@
 
 ! RUN: %flang -S -### %s 2>&1 \
 ! RUN: -fopenmp --offload-arch=gfx90a --offload-arch=sm_70 
--offload-device-only \
-! RUN: --target=aarch64-unknown-linux-gnu \
+! RUN: --target=aarch64-unknown-linux-gnu -nogpulib\
 ! RUN:   | FileCheck %s --check-prefix=OFFLOAD-DEVICE
 
 ! OFFLOAD-DEVICE: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
@@ -48,13 +48,13 @@
 ! OFFLOAD-DEVICE-NOT: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"aarch64-unknown-linux-gnu"
 
 ! Test regular -fopenmp with offload for basic fopenmp-is-target-device flag 
addition and correct fopenmp 
-! RUN: %flang -### -fopenmp --offload-arch=gfx90a 
-fopenmp-targets=amdgcn-amd-amdhsa %s 2>&1 | FileCheck 
--check-prefixes=CHECK-OPENMP-IS-TARGET-DEVICE %s
+! RUN: %flang -### -fopenmp --offload-arch=gfx90a 
-fopenmp-targets=amdgcn-amd-amdhsa -nogpulib %s 2>&1 | FileCheck 
--check-prefixes=CHECK-OPENMP-IS-TARGET-DEVICE %s
 ! CHECK-OPENMP-IS-TARGET-DEVICE: "{{[^"]*}}flang-new" "-fc1" {{.*}} "-fopenmp" 
{{.*}} "-fopenmp-is-target-device" {{.*}}.f90"
 
 ! Testing appropriate flags are gnerated and appropriately assigned by the 
dr

[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-29 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski edited 
https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-29 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski closed 
https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP][AMDGPU] Do not attach -fcuda-is-device (PR #99002)

2024-07-16 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski created 
https://github.com/llvm/llvm-project/pull/99002

-fcuda-is-device flag is not used for OpenMP offloading for AMD GPUs and it 
does not need to be added as clang cc1 option for OpenMP code.

This PR has the same functionality as 
https://github.com/llvm/llvm-project/pull/96909 but it doesn't introduce 
regression for virtual function support.

>From 3ffb93bef74694d71faf94fd6c30a149a028696d Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Wed, 3 Jul 2024 09:08:10 -0500
Subject: [PATCH] [OpenMP][AMDGPU] Do not attach -fcuda-is-device

-fcuda-is-device flag is not used for OpenMP offloading for AMD GPUs
and it does not need to be added as clang cc1 option for OpenMP code.
---
 clang/lib/CodeGen/CodeGenModule.h| 2 +-
 clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 2 --
 clang/test/Driver/amdgpu-openmp-toolchain.c  | 2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.h 
b/clang/lib/CodeGen/CodeGenModule.h
index caa3786c033b5..657e681730c3a 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1010,7 +1010,7 @@ class CodeGenModule : public CodeGenTypeCache {
   bool shouldEmitRTTI(bool ForEH = false) {
 return (ForEH || getLangOpts().RTTI) && !getLangOpts().CUDAIsDevice &&
!(getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice &&
- getTriple().isNVPTX());
+ (getTriple().isNVPTX() || getTriple().isAMDGPU()));
   }
 
   /// Get the address of the RTTI descriptor for the given type.
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp 
b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index 1c0fb4babe3a5..b75d400e6ce91 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -47,8 +47,6 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
   assert(DeviceOffloadingKind == Action::OFK_OpenMP &&
  "Only OpenMP offloading kinds are supported.");
 
-  CC1Args.push_back("-fcuda-is-device");
-
   if (DriverArgs.hasArg(options::OPT_nogpulib))
 return;
 
diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c 
b/clang/test/Driver/amdgpu-openmp-toolchain.c
index 49af04acc4639..a153c4afb0ce8 100644
--- a/clang/test/Driver/amdgpu-openmp-toolchain.c
+++ b/clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -7,7 +7,7 @@
 
 // verify the tools invocations
 // CHECK: "-cc1" "-triple" 
"x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
-// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" 
"x86_64-unknown-linux-gnu"{{.*}}"-fcuda-is-device"{{.*}}"-target-cpu" "gfx906"
+// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" 
"x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "gfx906"
 // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj"
 // CHECK: clang-linker-wrapper{{.*}} "-o" "a.out"
 

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP][AMDGPU] Do not attach -fcuda-is-device (PR #99002)

2024-07-18 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski closed 
https://github.com/llvm/llvm-project/pull/99002
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-19 Thread Dominik Adamski via cfe-commits


@@ -333,6 +333,9 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  const ToolChain &TC = getToolChain();
+  TC.addClangTargetOptions(Args, CmdArgs, Action::OffloadKind::OFK_OpenMP);

DominikAdamski wrote:

I asked question on flang-compiler slack (openmp/openacc channel). If I get no 
response, I will raise question on Flang technical community call on Monday.

https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-19 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski edited 
https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-19 Thread Dominik Adamski via cfe-commits

DominikAdamski wrote:

> > Would it be possible for you to investigate that? It really shouldn't be 
> > required if we can't help it.
> 
> +1

Fixed in PR https://github.com/llvm/llvm-project/pull/99002

https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add bitcode files for AMD GPU OpenMP (PR #96742)

2024-07-19 Thread Dominik Adamski via cfe-commits


@@ -8024,7 +8024,7 @@ def source_date_epoch : Separate<["-"], 
"source-date-epoch">,
 // CUDA Options
 
//===--===//
 
-let Visibility = [CC1Option] in {
+let Visibility = [CC1Option, FC1Option] in {

DominikAdamski wrote:

PR https://github.com/llvm/llvm-project/pull/99002 removes adding 
`-fcuda-is-device` option for OpenMP AMDGPU in clang. In consequence we don't 
need to add `-fcuda-is-device` flag for Flang.

https://github.com/llvm/llvm-project/pull/96742
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [flang] [Flang-new][OpenMP] Add offload related flags for AMDGPU (PR #96742)

2024-06-26 Thread Dominik Adamski via cfe-commits

https://github.com/DominikAdamski created 
https://github.com/llvm/llvm-project/pull/96742

Flang-new needs to add `mlink-builtin-bitcode` objects to properly support 
offload code generation for AMD GPU.

fcuda-is-device flag is not used by Flang currently. In the future it will be 
needed for Flang equivalent function: 
`AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace`.

>From 80d46755e741cb9daa743574a3a4bfb580e0ad06 Mon Sep 17 00:00:00 2001
From: Dominik Adamski 
Date: Fri, 21 Jun 2024 18:03:53 +0200
Subject: [PATCH] [Flang-new][OpenMP] Add offload related flags for AMDGPU

Flang-new needs to add mlink-builtin-bitcode objects
to properly support offload code generation for AMD GPU.

fcuda-is-device flag is not used by Flang currently.
In the future it will be needed for Flang equivalent function:
AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace.
---
 clang/include/clang/Driver/Options.td| 4 ++--
 clang/lib/Driver/ToolChains/Flang.cpp| 3 +++
 flang/test/Driver/omp-driver-offload.f90 | 8 
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index dd55838dcf384..612d5793232ce 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -8016,7 +8016,7 @@ def source_date_epoch : Separate<["-"], 
"source-date-epoch">,
 // CUDA Options
 
//===--===//
 
-let Visibility = [CC1Option] in {
+let Visibility = [CC1Option, FC1Option] in {
 
 def fcuda_is_device : Flag<["-"], "fcuda-is-device">,
   HelpText<"Generate code for CUDA device">,
@@ -8031,7 +8031,7 @@ def fno_cuda_host_device_constexpr : Flag<["-"], 
"fno-cuda-host-device-constexpr
   HelpText<"Don't treat unattributed constexpr functions as __host__ 
__device__.">,
   MarshallingInfoNegativeFlag>;
 
-} // let Visibility = [CC1Option]
+} // let Visibility = [CC1Option, FC1Option]
 
 
//===--===//
 // OpenMP Options
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp 
b/clang/lib/Driver/ToolChains/Flang.cpp
index 42b45dba2bd31..2679f284c5016 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -333,6 +333,9 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
 StringRef Val = A->getValue();
 CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
   }
+
+  const ToolChain &TC = getToolChain();
+  TC.addClangTargetOptions(Args, CmdArgs, Action::OffloadKind::OFK_OpenMP);
 }
 
 void Flang::addTargetOptions(const ArgList &Args,
diff --git a/flang/test/Driver/omp-driver-offload.f90 
b/flang/test/Driver/omp-driver-offload.f90
index 6fb4f4ca1..b8afbe65961dc 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -227,3 +227,11 @@
 ! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
 ! FORCE-USM-OFFLOAD-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" 
"amdgcn-amd-amdhsa"
 ! FORCE-USM-OFFLOAD-SAME: "-fopenmp" "-fopenmp-force-usm"
+
+! RUN:   %flang -### -v --target=x86_64-unknown-linux-gnu -fopenmp  \
+! RUN:  --offload-arch=gfx900 \
+! RUN:  --rocm-path=%S/Inputs/rocm %s 2>&1 \
+! RUN:   | FileCheck --check-prefix=MLINK-BUILTIN-BITCODE  %s
+! MLINK-BUILTIN-BITCODE:  "{{[^"]*}}flang-new" "-fc1" "-triple" 
"amdgcn-amd-amdhsa"
+! MLINK-BUILTIN-BITCODE-SAME: "-fcuda-is-device"
+! MLINK-BUILTIN-BITCODE-SAME: "-mlink-builtin-bitcode" 
{{.*Inputs.*rocm.*amdgcn.*bitcode.*}}oclc_isa_version_900.bc

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


  1   2   >