[llvm-branch-commits] [lld] 1d5277c - Revert "[llvm-objdump] Print ... even if a data mapping symbol is active"

2024-09-25 Thread via llvm-branch-commits

Author: Justin Bogner
Date: 2024-09-25T19:14:39-07:00
New Revision: 1d5277c271bc01fbc5af90292f8dd9af9ded623a

URL: 
https://github.com/llvm/llvm-project/commit/1d5277c271bc01fbc5af90292f8dd9af9ded623a
DIFF: 
https://github.com/llvm/llvm-project/commit/1d5277c271bc01fbc5af90292f8dd9af9ded623a.diff

LOG: Revert "[llvm-objdump] Print ... even if a data mapping symbol is active"

This reverts commit abe0dd195a3b2630afdc5c1c233eb2a068b2d72f.

Added: 


Modified: 
lld/test/ELF/aarch64-undefined-weak.s
llvm/test/MC/ARM/ltorg-range.s
llvm/tools/llvm-objdump/llvm-objdump.cpp

Removed: 
llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test
llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test



diff  --git a/lld/test/ELF/aarch64-undefined-weak.s 
b/lld/test/ELF/aarch64-undefined-weak.s
index 015f9c9a043e54..f4628453ec3fea 100644
--- a/lld/test/ELF/aarch64-undefined-weak.s
+++ b/lld/test/ELF/aarch64-undefined-weak.s
@@ -1,7 +1,7 @@
 // REQUIRES: aarch64
 // RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o
 // RUN: ld.lld --image-base=0x1000 %t.o -o %t
-// RUN: llvm-objdump -d -z --no-show-raw-insn %t | FileCheck %s
+// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
 
 // Check that the ARM 64-bit ABI rules for undefined weak symbols are applied.
 // Branch instructions are resolved to the next instruction. Undefined

diff  --git a/llvm/test/MC/ARM/ltorg-range.s b/llvm/test/MC/ARM/ltorg-range.s
index 88b9bb3cb5be80..5c27d4cd0df26f 100644
--- a/llvm/test/MC/ARM/ltorg-range.s
+++ b/llvm/test/MC/ARM/ltorg-range.s
@@ -1,5 +1,5 @@
 @ RUN: llvm-mc -triple armv7-unknown-linux-gnueabi -filetype obj -o - %s \
-@ RUN:   | llvm-objdump -d -z - | FileCheck %s
+@ RUN:   | llvm-objdump -d - | FileCheck %s
 
 ldr r0, =0x01020304
 @ CHECK: ldr

diff  --git a/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test 
b/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test
deleted file mode 100644
index a56d056f8a2256..00
--- a/llvm/test/tools/llvm-objdump/ELF/AArch64/zeroes.test
+++ /dev/null
@@ -1,66 +0,0 @@
-## Test zero dumping when a data mapping symbol is active.
-# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t
-# RUN: llvm-objdump -t -d %t | FileCheck %s
-
-# CHECK:  SYMBOL TABLE:
-# CHECK-NEXT:  l   .text   $d
-# CHECK-NEXT: 000c l   .text   $x
-# CHECK-NEXT: 0010 l   .text   $d
-
-# CHECK:   <_start>:
-# CHECK-NEXT: ...
-# CHECK-NEXT:8: 01 00 00 00   .word   0x0001
-# CHECK-NEXT:c: d503201f  nop
-# CHECK-NEXT: ...
-# CHECK-NEXT:   18: d503201f  nop
-# CHECK-NEXT: ...
-# CHECK-NEXT:   2c: d503201f  nop
-# CHECK-NEXT: ...
-# CHECK-NEXT:   48: d503201f  nop
-
-# RUN: llvm-objdump -d -z %t | FileCheck %s --check-prefix=ZERO
-
-# ZERO:   <_start>:
-# ZERO-NEXT:0: 00 00 00 00   .word   0x
-# ZERO-NEXT:4: 00 00 00 00   .word   0x
-# ZERO-NEXT:8: 01 00 00 00   .word   0x0001
-# ZERO-NEXT:c: d503201f  nop
-# ZERO-NEXT:   10: 00 00 00 00   .word   0x
-# ZERO-NEXT:   14: 00 00 00 00   .word   0x
-# ZERO-NEXT:   18: d503201f  nop
-
-## Check we do not skip zeroes blocks if have relocations pointed to these 
places.
-# RUN: llvm-objdump -d -r %t | FileCheck %s --check-prefix=RELOC
-
-# RELOC:   <_start>:
-# RELOC-NEXT: ...
-# RELOC-NEXT:8: 01 00 00 00   .word   0x0001
-# RELOC-NEXT:c: d503201f  nop
-# RELOC-NEXT: ...
-# RELOC-NEXT:   18: d503201f  nop
-# RELOC-NEXT:   1c: 00 00 00 00   .word   0x
-# RELOC-NEXT: 001c:  R_AARCH64_ABS64  x1
-# RELOC-NEXT: ...
-# RELOC-NEXT:   2c: d503201f  nop
-# RELOC-NEXT: ...
-# RELOC-NEXT:   38: 00 00 00 00   .word   0x
-# RELOC-NEXT: 0038:  R_AARCH64_ABS64  x2
-# RELOC-NEXT: ...
-# RELOC-NEXT:   48: d503201f  nop
-
-.globl _start
-_start:
-  .space 8
-  .long 1
-  nop
-  .space 8
-  nop
-
-  .quad x1
-  .space 8
-  nop
-
-  .space 8
-  .quad x2
-  .space 8
-  nop

diff  --git a/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test 
b/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test
deleted file mode 100644
index 8601343bd146e9..00
--- a/llvm/test/tools/llvm-objdump/ELF/ARM/zeroes.test
+++ /dev/null
@@ -1,47 +0,0 @@
-## Test zero dumping when a data mapping symbol is active.
-# RUN: llvm-mc -filetype=obj -triple=armv7 %s -o %t
-# RUN: llvm-objdump -t -d %t | FileCheck %s
-
-# CHECK:  SYMBOL TABLE:
-# CHECK-NEXT:  l   .text   $d
-# CHECK-NEXT: 000c l 

[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)

2024-09-25 Thread Akshat Oke via llvm-branch-commits


@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs 
-run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s  | FileCheck %s
+
+---
+
+name: pre_allocate_wwm_regs_strict
+tracksRegLiveness: true
+body: |

Akshat-Oke wrote:

What exactly should be serialized for allocated registers?
I am working on serializing virtual register flags (currently there is only one 
WWM_REG)

https://github.com/llvm/llvm-project/pull/109963
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NVPTX] add address class for variables with a single register location (PR #110030)

2024-09-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-debuginfo

Author: William G Hatch (willghatch)


Changes

This is the final piece to enable register debugging for variables in registers 
that have single locations that last throughout their enclosing scope.

The next step after this for supporting register debugging for NVPTX is to 
support the .debug_loc section.

Stacked on top of: https://github.com/llvm/llvm-project/pull/109495

---

Patch is 357.48 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/110030.diff


4 Files Affected:

- (modified) llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp (+8) 
- (modified) llvm/test/DebugInfo/NVPTX/cu-range-hole.ll (+15-11) 
- (modified) llvm/test/DebugInfo/NVPTX/debug-addr-class.ll (+20-16) 
- (modified) llvm/test/DebugInfo/NVPTX/debug-info.ll (+1123-1120) 


``diff
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp 
b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 0a1ff189bedbc4..a52699e4fe38e2 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -779,6 +779,14 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable 
&DV, bool Abstract) {
 void DwarfCompileUnit::applyConcreteDbgVariableAttributes(
 const Loc::Single &Single, const DbgVariable &DV, DIE &VariableDie) {
   const DbgValueLoc *DVal = &Single.getValueLoc();
+  if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB() &&
+  !Single.getExpr()) {
+
+// Lack of expression means it is a register.  Registers for PTX need to
+// be marked with DW_AT_address_class = 2.  See
+// 
https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+addUInt(VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, 2);
+  }
   if (!DVal->isVariadic()) {
 const DbgValueLocEntry *Entry = DVal->getLocEntries().begin();
 if (Entry->isLocation()) {
diff --git a/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll 
b/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll
index 6acc1ba2512711..2d927b18d976d9 100644
--- a/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll
+++ b/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll
@@ -120,6 +120,8 @@ entry:
 ; CHECK-NEXT: .b8 3// Abbreviation Code
 ; CHECK-NEXT: .b8 5// DW_TAG_formal_parameter
 ; CHECK-NEXT: .b8 0// DW_CHILDREN_no
+; CHECK-NEXT: .b8 51   // DW_AT_address_class
+; CHECK-NEXT: .b8 11   // DW_FORM_data1
 ; CHECK-NEXT: .b8 2// DW_AT_location
 ; CHECK-NEXT: .b8 10   // DW_FORM_block1
 ; CHECK-NEXT: .b8 3// DW_AT_name
@@ -147,12 +149,12 @@ entry:
 ; CHECK-NEXT: }
 ; CHECK-NEXT: .section .debug_info
 ; CHECK-NEXT: {
-; CHECK-NEXT: .b32 195 // Length of Unit
+; CHECK-NEXT: .b32 197 // Length of Unit
 ; CHECK-NEXT: .b8 2// DWARF version number
 ; CHECK-NEXT: .b8 0
 ; CHECK-NEXT: .b32 .debug_abbrev   // Offset Into Abbrev. 
Section
 ; CHECK-NEXT: .b8 8// Address Size (in bytes)
-; CHECK-NEXT: .b8 1// Abbrev [1] 0xb:0xbc 
DW_TAG_compile_unit
+; CHECK-NEXT: .b8 1// Abbrev [1] 0xb:0xbe 
DW_TAG_compile_unit
 ; CHECK-NEXT: .b8 99   // DW_AT_producer
 ; CHECK-NEXT: .b8 108
 ; CHECK-NEXT: .b8 97
@@ -225,7 +227,7 @@ entry:
 ; CHECK-NEXT: .b8 0
 ; CHECK-NEXT: .b64 $L__func_begin0 // DW_AT_low_pc
 ; CHECK-NEXT: .b64 $L__func_end2   // DW_AT_high_pc
-; CHECK-NEXT: .b8 2// Abbrev [2] 0x65:0x2d 
DW_TAG_subprogram
+; CHECK-NEXT: .b8 2// Abbrev [2] 0x65:0x2e 
DW_TAG_subprogram
 ; CHECK-NEXT: .b64 $L__func_begin0 // DW_AT_low_pc
 ; CHECK-NEXT: .b64 $L__func_end0   // DW_AT_high_pc
 ; CHECK-NEXT: .b8 1// DW_AT_frame_base
@@ -235,9 +237,10 @@ entry:
 ; CHECK-NEXT: .b8 1// DW_AT_decl_file
 ; CHECK-NEXT: .b8 1// DW_AT_decl_line
 ; CHECK-NEXT: .b8 1// DW_AT_prototyped
-; CHECK-NEXT: .b32 191 // DW_AT_type
+; CHECK-NEXT: .b32 193 // DW_AT_type
 ; CHECK-NEXT: .b8 1// DW_AT_external
-; CHECK-NEXT: .b8 3// Abbrev [3] 0x82:0xf 
DW_TAG_formal_parameter
+; CHECK-NEXT: .b8 3// Abbrev [3] 0x82:0x10 
DW_TAG_formal_parameter
+; CHECK-NEXT: .b8 2// DW_AT_address_class
 ; CHECK-NEXT: .b8 5// DW_AT_loca

[llvm-branch-commits] [llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)

2024-09-25 Thread Shilei Tian via llvm-branch-commits


@@ -12571,17 +12571,59 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   void initialize(Attributor &A) override {
 assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
"Associated value is not a pointer");
-if (getAssociatedType()->getPointerAddressSpace())
+// If the pointer already has non-generic address space, we assume it is 
the
+// correct one.
+if (getAssociatedType()->getPointerAddressSpace()) {
+  [[maybe_unused]] bool R =
+  takeAddressSpace(getAssociatedType()->getPointerAddressSpace());
+  assert(R && "the take should happen");
   indicateOptimisticFixpoint();
+  return;
+}
+// If the pointer is an addrspacecast, we assume the source address space 
is
+// the correct one.
+Value *V = &getAssociatedValue();
+if (auto *ASC = dyn_cast(V)) {
+  [[maybe_unused]] bool R = takeAddressSpace(ASC->getSrcAddressSpace());
+  assert(R && "the take should happen");
+  indicateOptimisticFixpoint();
+  return;
+}
+if (auto *C = dyn_cast(V)) {
+  if (C->getOpcode() == Instruction::AddrSpaceCast) {
+[[maybe_unused]] bool R = takeAddressSpace(
+C->getOperand(0)->getType()->getPointerAddressSpace());
+assert(R && "the take should happen");
+indicateOptimisticFixpoint();
+return;
+  }
+}
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
-int32_t OldAddressSpace = AssumedAddressSpace;
+uint32_t OldAddressSpace = AssumedAddressSpace;
 auto *AUO = A.getOrCreateAAFor(getIRPosition(), this,
 DepClassTy::REQUIRED);
 auto Pred = [&](Value &Obj) {
   if (isa(&Obj))
 return true;
+  // If an argument in generic address space has addrspace cast uses, and
+  // those casts are same, then we take the dst addrspace.
+  if (auto *Arg = dyn_cast(&Obj)) {

shiltian wrote:

If that's the case, the `AMDGPUPromoteKernelArgumentsPass` should not exist at 
the first place.
Both Flang and Clang (for OpenMP target offloading) still generate generic 
pointer.
HIP and OpenCL front ends generate AS1 pointer, which is convenient.

https://github.com/llvm/llvm-project/pull/108258
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NVPTX] add address class for variables with a single register location (PR #110030)

2024-09-25 Thread Walter Erquinigo via llvm-branch-commits

https://github.com/walter-erquinigo approved this pull request.

amazing

https://github.com/llvm/llvm-project/pull/110030
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NVPTX] add support for encoding PTX registers for DWARF (PR #109495)

2024-09-25 Thread Walter Erquinigo via llvm-branch-commits

https://github.com/walter-erquinigo approved this pull request.


https://github.com/llvm/llvm-project/pull/109495
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] 153a49a - Revert "[MLIR] Make `OneShotModuleBufferize` use `OpInterface` (#107295)"

2024-09-25 Thread via llvm-branch-commits

Author: Matthias Springer
Date: 2024-09-25T09:17:16+02:00
New Revision: 153a49ab4875fb6c7bb69d9efc1e5ce35f2b61f9

URL: 
https://github.com/llvm/llvm-project/commit/153a49ab4875fb6c7bb69d9efc1e5ce35f2b61f9
DIFF: 
https://github.com/llvm/llvm-project/commit/153a49ab4875fb6c7bb69d9efc1e5ce35f2b61f9.diff

LOG: Revert "[MLIR] Make `OneShotModuleBufferize` use `OpInterface` (#107295)"

This reverts commit f586b1e3f42788025aa6f55be70c5e361cc8b529.

Added: 


Modified: 
mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h

mlir/include/mlir/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.h
mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp

mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp
mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp
mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir
mlir/test/Dialect/LLVM/transform-e2e.mlir
mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir
mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir
mlir/test/Dialect/Vector/transform-vector.mlir
mlir/test/Examples/transform/ChH/full.mlir

Removed: 




diff  --git 
a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h 
b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
index d19687ec9afee1..aceb9d059b95f3 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
@@ -11,7 +11,6 @@
 
 #include "mlir/IR/Operation.h"
 #include "mlir/IR/PatternMatch.h"
-#include "mlir/Interfaces/FunctionInterfaces.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/DenseMapInfoVariant.h"
 #include "llvm/ADT/SetVector.h"
@@ -261,9 +260,9 @@ struct BufferizationOptions {
   using AnalysisStateInitFn = std::function;
   /// Tensor -> MemRef type converter.
   /// Parameters: Value, memory space, func op, bufferization options
-  using FunctionArgTypeConverterFn = std::function;
+  using FunctionArgTypeConverterFn =
+  std::function;
   /// Tensor -> MemRef type converter.
   /// Parameters: Value, memory space, bufferization options
   using UnknownTypeConverterFn = std::function equivalentFuncArgs;
+  DenseMap equivalentFuncArgs;
 
   /// A mapping of FuncOp BBArg indices to aliasing ReturnOp OpOperand indices.
-  DenseMap aliasingReturnVals;
+  DenseMap aliasingReturnVals;
 
   /// A set of all read BlockArguments of FuncOps.
-  DenseMap readBbArgs;
+  DenseMap readBbArgs;
 
   /// A set of all written-to BlockArguments of FuncOps.
-  DenseMap writtenBbArgs;
+  DenseMap writtenBbArgs;
 
   /// Keep track of which FuncOps are fully analyzed or currently being
   /// analyzed.
-  DenseMap analyzedFuncOps;
+  DenseMap analyzedFuncOps;
 
   /// This function is called right before analyzing the given FuncOp. It
   /// initializes the data structures for the FuncOp in this state object.
-  void startFunctionAnalysis(FunctionOpInterface funcOp);
+  void startFunctionAnalysis(FuncOp funcOp);
 };
 
 void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry);

diff  --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp 
b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 92f757111cbaf7..85604eef2f2830 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -18,7 +18,6 @@
 #include "mlir/IR/TypeUtilities.h"
 #include "mlir/IR/Value.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
-#include "mlir/Interfaces/FunctionInterfaces.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Support/Debug.h"
 
@@ -315,7 +314,7 @@ namespace {
 /// Default function arg type converter: Use a fully dynamic layout map.
 BaseMemRefType
 defaultFunctionArgTypeConverter(TensorType type, Attribute memorySpace,
-FunctionOpInterface funcOp,
+func::FuncOp funcOp,
 const BufferizationOptions &options) {
   return getMemRefTypeWithFullyDynamicLayout(type, memorySpace);
 }
@@ -362,7 +361,7 @@ BufferizationOptions::dynCastBufferizableOp(Value value) 
const {
 void BufferizationOptions::setFunctionBoundaryTypeConversion(
 LayoutMapOption layoutMapOption) {
   functionArgTypeConverterFn = [=](TensorType tensorType, Attribute 
memorySpace,
-   FunctionOpInterface funcOp,
+   func::FuncOp funcOp,
const BufferizationOptions &options) {
 if (layoutMapOption == LayoutMapOption::IdentityLayoutMap)
   return bufferization::getMemRefTypeWithStaticIdentityLayout(tensorType,

diff  --git 
a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp 
b/mlir/

[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)

2024-09-25 Thread via llvm-branch-commits

llvmbot wrote:

@arsenm What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/109920
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)

2024-09-25 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/109920

Backport 90c14748638f1e10e31173b145fdbb5c4529c922

Requested by: @nikic

>From 55eaa21656d6a9d55732207c25cdb0825cf4be2e Mon Sep 17 00:00:00 2001
From: Timothy Pearson <162513562+tpearson-...@users.noreply.github.com>
Date: Wed, 25 Sep 2024 02:09:50 -0500
Subject: [PATCH] [SDAG] Honor signed arguments in floating point libcalls
 (#109134)

In ExpandFPLibCall, an assumption is made that all floating point
libcalls that take integer arguments use unsigned integers. In the case
of ldexp and frexp, this assumption is incorrect, leading to
miscompilation and subsequent target-dependent incorrect operation.

Indicate that ldexp and frexp utilize signed arguments in
ExpandFPLibCall.

Fixes #108904

Signed-off-by: Timothy Pearson 
(cherry picked from commit 90c14748638f1e10e31173b145fdbb5c4529c922)
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |  3 +-
 llvm/test/CodeGen/PowerPC/ldexp-libcall.ll|  4 +-
 llvm/test/CodeGen/PowerPC/ldexp.ll| 36 ++
 .../PowerPC/negative-integer-fp-libcall.ll| 26 +++
 .../X86/fold-int-pow2-with-fmul-or-fdiv.ll| 69 ---
 5 files changed, 96 insertions(+), 42 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7f5b46af01c62f..4b25f553ffae91 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2190,7 +2190,8 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
 Results.push_back(Tmp.first);
 Results.push_back(Tmp.second);
   } else {
-SDValue Tmp = ExpandLibCall(LC, Node, false).first;
+bool IsSignedArgument = Node->getOpcode() == ISD::FLDEXP;
+SDValue Tmp = ExpandLibCall(LC, Node, IsSignedArgument).first;
 Results.push_back(Tmp);
   }
 }
diff --git a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll 
b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
index 6144a9d9203651..e531516c37e87e 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
@@ -10,7 +10,7 @@ define float @call_ldexpf(float %a, i32 %b) {
 ; CHECK-NEXT:std r0, 48(r1)
 ; CHECK-NEXT:.cfi_def_cfa_offset 32
 ; CHECK-NEXT:.cfi_offset lr, 16
-; CHECK-NEXT:clrldi r4, r4, 32
+; CHECK-NEXT:extsw r4, r4
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:addi r1, r1, 32
@@ -29,7 +29,7 @@ define double @call_ldexp(double %a, i32 %b) {
 ; CHECK-NEXT:std r0, 48(r1)
 ; CHECK-NEXT:.cfi_def_cfa_offset 32
 ; CHECK-NEXT:.cfi_offset lr, 16
-; CHECK-NEXT:clrldi r4, r4, 32
+; CHECK-NEXT:extsw r4, r4
 ; CHECK-NEXT:bl ldexp
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:addi r1, r1, 32
diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll 
b/llvm/test/CodeGen/PowerPC/ldexp.ll
index 151df6096b30bd..ffc826cc86de59 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp.ll
@@ -57,22 +57,24 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> 
%exp) {
 ; CHECK-NEXT:.cfi_offset v29, -48
 ; CHECK-NEXT:.cfi_offset v30, -32
 ; CHECK-NEXT:.cfi_offset v31, -16
-; CHECK-NEXT:xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:li r3, 0
+; CHECK-NEXT:xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:stxv v29, 32(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:xscvspdpn f1, vs0
-; CHECK-NEXT:vextuwrx r4, r3, v3
+; CHECK-NEXT:vextuwrx r3, r3, v3
 ; CHECK-NEXT:stxv v30, 48(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v31, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:vmr v31, v3
 ; CHECK-NEXT:vmr v30, v2
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
-; CHECK-NEXT:xxswapd vs0, v30
 ; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:xxswapd vs0, v30
 ; CHECK-NEXT:xscvdpspn v29, f1
 ; CHECK-NEXT:xscvspdpn f1, vs0
-; CHECK-NEXT:vextuwrx r4, r3, v31
+; CHECK-NEXT:vextuwrx r3, r3, v31
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:xscvdpspn vs0, f1
@@ -100,35 +102,39 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x 
i32> %exp) {
 ; CHECK-NEXT:.cfi_offset v29, -48
 ; CHECK-NEXT:.cfi_offset v30, -32
 ; CHECK-NEXT:.cfi_offset v31, -16
-; CHECK-NEXT:li r3, 12
-; CHECK-NEXT:xscvspdpn f1, v2
+; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:xxswapd vs0, v2
 ; CHECK-NEXT:stxv v28, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT:xscvspdpn f1, vs0
+; CHECK-NEXT:vextuwrx r3, r3, v3
 ; CHECK-NEXT:stxv v29, 48(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v30, 64(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v31, 80(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:vmr v31, v3
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:vmr v30, v2
-; CHECK-NEXT:vextuwrx r4, r3, v3
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:no

[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)

2024-09-25 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/109920
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)

2024-09-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-selectiondag

Author: None (llvmbot)


Changes

Backport 90c14748638f1e10e31173b145fdbb5c4529c922

Requested by: @nikic

---
Full diff: https://github.com/llvm/llvm-project/pull/109920.diff


5 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+2-1) 
- (modified) llvm/test/CodeGen/PowerPC/ldexp-libcall.ll (+2-2) 
- (modified) llvm/test/CodeGen/PowerPC/ldexp.ll (+21-15) 
- (added) llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll (+26) 
- (modified) llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll (+45-24) 


``diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7f5b46af01c62f..4b25f553ffae91 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2190,7 +2190,8 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
 Results.push_back(Tmp.first);
 Results.push_back(Tmp.second);
   } else {
-SDValue Tmp = ExpandLibCall(LC, Node, false).first;
+bool IsSignedArgument = Node->getOpcode() == ISD::FLDEXP;
+SDValue Tmp = ExpandLibCall(LC, Node, IsSignedArgument).first;
 Results.push_back(Tmp);
   }
 }
diff --git a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll 
b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
index 6144a9d9203651..e531516c37e87e 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
@@ -10,7 +10,7 @@ define float @call_ldexpf(float %a, i32 %b) {
 ; CHECK-NEXT:std r0, 48(r1)
 ; CHECK-NEXT:.cfi_def_cfa_offset 32
 ; CHECK-NEXT:.cfi_offset lr, 16
-; CHECK-NEXT:clrldi r4, r4, 32
+; CHECK-NEXT:extsw r4, r4
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:addi r1, r1, 32
@@ -29,7 +29,7 @@ define double @call_ldexp(double %a, i32 %b) {
 ; CHECK-NEXT:std r0, 48(r1)
 ; CHECK-NEXT:.cfi_def_cfa_offset 32
 ; CHECK-NEXT:.cfi_offset lr, 16
-; CHECK-NEXT:clrldi r4, r4, 32
+; CHECK-NEXT:extsw r4, r4
 ; CHECK-NEXT:bl ldexp
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:addi r1, r1, 32
diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll 
b/llvm/test/CodeGen/PowerPC/ldexp.ll
index 151df6096b30bd..ffc826cc86de59 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp.ll
@@ -57,22 +57,24 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> 
%exp) {
 ; CHECK-NEXT:.cfi_offset v29, -48
 ; CHECK-NEXT:.cfi_offset v30, -32
 ; CHECK-NEXT:.cfi_offset v31, -16
-; CHECK-NEXT:xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:li r3, 0
+; CHECK-NEXT:xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:stxv v29, 32(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:xscvspdpn f1, vs0
-; CHECK-NEXT:vextuwrx r4, r3, v3
+; CHECK-NEXT:vextuwrx r3, r3, v3
 ; CHECK-NEXT:stxv v30, 48(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v31, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:vmr v31, v3
 ; CHECK-NEXT:vmr v30, v2
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
-; CHECK-NEXT:xxswapd vs0, v30
 ; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:xxswapd vs0, v30
 ; CHECK-NEXT:xscvdpspn v29, f1
 ; CHECK-NEXT:xscvspdpn f1, vs0
-; CHECK-NEXT:vextuwrx r4, r3, v31
+; CHECK-NEXT:vextuwrx r3, r3, v31
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:xscvdpspn vs0, f1
@@ -100,35 +102,39 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x 
i32> %exp) {
 ; CHECK-NEXT:.cfi_offset v29, -48
 ; CHECK-NEXT:.cfi_offset v30, -32
 ; CHECK-NEXT:.cfi_offset v31, -16
-; CHECK-NEXT:li r3, 12
-; CHECK-NEXT:xscvspdpn f1, v2
+; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:xxswapd vs0, v2
 ; CHECK-NEXT:stxv v28, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT:xscvspdpn f1, vs0
+; CHECK-NEXT:vextuwrx r3, r3, v3
 ; CHECK-NEXT:stxv v29, 48(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v30, 64(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v31, 80(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:vmr v31, v3
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:vmr v30, v2
-; CHECK-NEXT:vextuwrx r4, r3, v3
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
-; CHECK-NEXT:xxswapd vs0, v30
-; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:li r3, 12
 ; CHECK-NEXT:xscpsgndp v29, f1, f1
-; CHECK-NEXT:xscvspdpn f1, vs0
-; CHECK-NEXT:vextuwrx r4, r3, v31
+; CHECK-NEXT:xscvspdpn f1, v30
+; CHECK-NEXT:vextuwrx r3, r3, v31
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
-; CHECK-NEXT:xxmrghd vs0, v29, vs1
+; CHECK-NEXT:xxmrghd vs0, vs1, v29
 ; CHECK-NEXT:li r3, 0
-; CHECK-NEXT:vextuwrx r4, r3, v31
+; CHECK-NEXT:vextuwrx r3, r3, v31
 ; CHECK-NEXT:xvcvdpsp v28, vs0
 ; CHECK-NEXT:xxsldwi vs0, v30, v30, 3
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:xscvspdpn f1, vs0
 ; CHECK-NEXT:  

[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)

2024-09-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: None (llvmbot)


Changes

Backport 90c14748638f1e10e31173b145fdbb5c4529c922

Requested by: @nikic

---
Full diff: https://github.com/llvm/llvm-project/pull/109920.diff


5 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+2-1) 
- (modified) llvm/test/CodeGen/PowerPC/ldexp-libcall.ll (+2-2) 
- (modified) llvm/test/CodeGen/PowerPC/ldexp.ll (+21-15) 
- (added) llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll (+26) 
- (modified) llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll (+45-24) 


``diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7f5b46af01c62f..4b25f553ffae91 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2190,7 +2190,8 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
 Results.push_back(Tmp.first);
 Results.push_back(Tmp.second);
   } else {
-SDValue Tmp = ExpandLibCall(LC, Node, false).first;
+bool IsSignedArgument = Node->getOpcode() == ISD::FLDEXP;
+SDValue Tmp = ExpandLibCall(LC, Node, IsSignedArgument).first;
 Results.push_back(Tmp);
   }
 }
diff --git a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll 
b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
index 6144a9d9203651..e531516c37e87e 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
@@ -10,7 +10,7 @@ define float @call_ldexpf(float %a, i32 %b) {
 ; CHECK-NEXT:std r0, 48(r1)
 ; CHECK-NEXT:.cfi_def_cfa_offset 32
 ; CHECK-NEXT:.cfi_offset lr, 16
-; CHECK-NEXT:clrldi r4, r4, 32
+; CHECK-NEXT:extsw r4, r4
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:addi r1, r1, 32
@@ -29,7 +29,7 @@ define double @call_ldexp(double %a, i32 %b) {
 ; CHECK-NEXT:std r0, 48(r1)
 ; CHECK-NEXT:.cfi_def_cfa_offset 32
 ; CHECK-NEXT:.cfi_offset lr, 16
-; CHECK-NEXT:clrldi r4, r4, 32
+; CHECK-NEXT:extsw r4, r4
 ; CHECK-NEXT:bl ldexp
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:addi r1, r1, 32
diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll 
b/llvm/test/CodeGen/PowerPC/ldexp.ll
index 151df6096b30bd..ffc826cc86de59 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp.ll
@@ -57,22 +57,24 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> 
%exp) {
 ; CHECK-NEXT:.cfi_offset v29, -48
 ; CHECK-NEXT:.cfi_offset v30, -32
 ; CHECK-NEXT:.cfi_offset v31, -16
-; CHECK-NEXT:xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:li r3, 0
+; CHECK-NEXT:xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:stxv v29, 32(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:xscvspdpn f1, vs0
-; CHECK-NEXT:vextuwrx r4, r3, v3
+; CHECK-NEXT:vextuwrx r3, r3, v3
 ; CHECK-NEXT:stxv v30, 48(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v31, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:vmr v31, v3
 ; CHECK-NEXT:vmr v30, v2
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
-; CHECK-NEXT:xxswapd vs0, v30
 ; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:xxswapd vs0, v30
 ; CHECK-NEXT:xscvdpspn v29, f1
 ; CHECK-NEXT:xscvspdpn f1, vs0
-; CHECK-NEXT:vextuwrx r4, r3, v31
+; CHECK-NEXT:vextuwrx r3, r3, v31
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:xscvdpspn vs0, f1
@@ -100,35 +102,39 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x 
i32> %exp) {
 ; CHECK-NEXT:.cfi_offset v29, -48
 ; CHECK-NEXT:.cfi_offset v30, -32
 ; CHECK-NEXT:.cfi_offset v31, -16
-; CHECK-NEXT:li r3, 12
-; CHECK-NEXT:xscvspdpn f1, v2
+; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:xxswapd vs0, v2
 ; CHECK-NEXT:stxv v28, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT:xscvspdpn f1, vs0
+; CHECK-NEXT:vextuwrx r3, r3, v3
 ; CHECK-NEXT:stxv v29, 48(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v30, 64(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v31, 80(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:vmr v31, v3
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:vmr v30, v2
-; CHECK-NEXT:vextuwrx r4, r3, v3
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
-; CHECK-NEXT:xxswapd vs0, v30
-; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:li r3, 12
 ; CHECK-NEXT:xscpsgndp v29, f1, f1
-; CHECK-NEXT:xscvspdpn f1, vs0
-; CHECK-NEXT:vextuwrx r4, r3, v31
+; CHECK-NEXT:xscvspdpn f1, v30
+; CHECK-NEXT:vextuwrx r3, r3, v31
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
-; CHECK-NEXT:xxmrghd vs0, v29, vs1
+; CHECK-NEXT:xxmrghd vs0, vs1, v29
 ; CHECK-NEXT:li r3, 0
-; CHECK-NEXT:vextuwrx r4, r3, v31
+; CHECK-NEXT:vextuwrx r3, r3, v31
 ; CHECK-NEXT:xvcvdpsp v28, vs0
 ; CHECK-NEXT:xxsldwi vs0, v30, v30, 3
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:xscvspdpn f1, vs0
 ; CHECK-NEXT:bl l

[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)

2024-09-25 Thread via llvm-branch-commits

github-actions[bot] wrote:

⚠️ We detected that you are using a GitHub private e-mail address to contribute 
to the repo. Please turn off [Keep my email addresses 
private](https://github.com/settings/emails) setting in your account. See 
[LLVM 
Discourse](https://discourse.llvm.org/t/hidden-emails-on-github-should-we-do-something-about-it)
 for more information.

https://github.com/llvm/llvm-project/pull/109920
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)

2024-09-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/109920
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ADT] Add more useful methods to SmallSet API (PR #108601)

2024-09-25 Thread Victor Campos via llvm-branch-commits

https://github.com/vhscampos updated 
https://github.com/llvm/llvm-project/pull/108601

>From 12b657a4761351d52fccb93ce52e64c3c1b1e91f Mon Sep 17 00:00:00 2001
From: Victor Campos 
Date: Fri, 9 Aug 2024 14:00:32 +0100
Subject: [PATCH 1/3] [ADT] Add more useful methods to SmallSet API

This patch adds useful methods to the SmallSet API:

 - Constructor that takes pair of iterators.
 - Constructor that takes a range.
 - Constructor that takes an initializer list.
 - Copy constructor.
 - Move constructor.
 - Copy assignment operator.
 - Move assignment operator.
---
 llvm/include/llvm/ADT/SmallSet.h| 17 
 llvm/unittests/ADT/SmallSetTest.cpp | 60 +
 2 files changed, 77 insertions(+)

diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h
index 56259ea7cf9d0f..431fdee56c20e0 100644
--- a/llvm/include/llvm/ADT/SmallSet.h
+++ b/llvm/include/llvm/ADT/SmallSet.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/iterator.h"
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -147,6 +148,22 @@ class SmallSet {
   using const_iterator = SmallSetIterator;
 
   SmallSet() = default;
+  SmallSet(const SmallSet &) = default;
+  SmallSet(SmallSet &&) = default;
+
+  template  SmallSet(IterT Begin, IterT End) {
+this->insert(Begin, End);
+  }
+
+  template 
+  explicit SmallSet(const iterator_range &R) {
+this->insert(R.begin(), R.end());
+  }
+
+  SmallSet(std::initializer_list L) { this->insert(L.begin(), L.end()); }
+
+  SmallSet &operator=(const SmallSet &) = default;
+  SmallSet &operator=(SmallSet &&) = default;
 
   [[nodiscard]] bool empty() const { return Vector.empty() && Set.empty(); }
 
diff --git a/llvm/unittests/ADT/SmallSetTest.cpp 
b/llvm/unittests/ADT/SmallSetTest.cpp
index 0fb20b19df9254..8219bf6f4b4c55 100644
--- a/llvm/unittests/ADT/SmallSetTest.cpp
+++ b/llvm/unittests/ADT/SmallSetTest.cpp
@@ -17,6 +17,66 @@
 
 using namespace llvm;
 
+TEST(SmallSetTest, ConstructorIteratorPair) {
+  auto L = {1, 2, 3, 4, 5};
+  SmallSet S(std::begin(L), std::end(L));
+  for (int Value : L)
+EXPECT_TRUE(S.contains(Value));
+}
+
+TEST(SmallSet, ConstructorRange) {
+  auto L = {1, 2, 3, 4, 5};
+
+  SmallSet S(llvm::make_range(std::begin(L), std::end(L)));
+  for (int Value : L)
+EXPECT_TRUE(S.contains(Value));
+}
+
+TEST(SmallSet, ConstructorInitializerList) {
+  auto L = {1, 2, 3, 4, 5};
+  SmallSet S = {1, 2, 3, 4, 5};
+  for (int Value : L)
+EXPECT_TRUE(S.contains(Value));
+}
+
+TEST(SmallSet, CopyConstructor) {
+  SmallSet S = {1, 2, 3};
+  SmallSet T = S;
+
+  EXPECT_EQ(S, T);
+}
+
+TEST(SmallSet, MoveConstructor) {
+  auto L = {1, 2, 3};
+  SmallSet S = L;
+  SmallSet T = std::move(S);
+
+  EXPECT_TRUE(T.size() == L.size());
+  for (int Value : L) {
+EXPECT_TRUE(T.contains(Value));
+  }
+}
+
+TEST(SmallSet, CopyAssignment) {
+  SmallSet S = {1, 2, 3};
+  SmallSet T;
+  T = S;
+
+  EXPECT_EQ(S, T);
+}
+
+TEST(SmallSet, MoveAssignment) {
+  auto L = {1, 2, 3};
+  SmallSet S = L;
+  SmallSet T;
+  T = std::move(S);
+
+  EXPECT_TRUE(T.size() == L.size());
+  for (int Value : L) {
+EXPECT_TRUE(T.contains(Value));
+  }
+}
+
 TEST(SmallSetTest, Insert) {
 
   SmallSet s1;

>From d122983eb4f1f66da2a4a6b5bcdb9c8171d18205 Mon Sep 17 00:00:00 2001
From: Victor Campos 
Date: Tue, 24 Sep 2024 17:43:42 +0100
Subject: [PATCH 2/3] fixup! [ADT] Add more useful methods to SmallSet API

---
 llvm/include/llvm/ADT/SmallSet.h|  4 ++--
 llvm/unittests/ADT/SmallSetTest.cpp | 34 +++--
 2 files changed, 15 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h
index 431fdee56c20e0..1b8ad542846630 100644
--- a/llvm/include/llvm/ADT/SmallSet.h
+++ b/llvm/include/llvm/ADT/SmallSet.h
@@ -152,12 +152,12 @@ class SmallSet {
   SmallSet(SmallSet &&) = default;
 
   template  SmallSet(IterT Begin, IterT End) {
-this->insert(Begin, End);
+insert(Begin, End);
   }
 
   template 
   explicit SmallSet(const iterator_range &R) {
-this->insert(R.begin(), R.end());
+insert(R.begin(), R.end());
   }
 
   SmallSet(std::initializer_list L) { this->insert(L.begin(), L.end()); }
diff --git a/llvm/unittests/ADT/SmallSetTest.cpp 
b/llvm/unittests/ADT/SmallSetTest.cpp
index 8219bf6f4b4c55..2feb0b1feb421b 100644
--- a/llvm/unittests/ADT/SmallSetTest.cpp
+++ b/llvm/unittests/ADT/SmallSetTest.cpp
@@ -12,49 +12,44 @@
 
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include 
 
 using namespace llvm;
 
 TEST(SmallSetTest, ConstructorIteratorPair) {
-  auto L = {1, 2, 3, 4, 5};
+  std::initializer_list L = {1, 2, 3, 4, 5};
   SmallSet S(std::begin(L), std::end(L));
-  for (int Value : L)
-EXPECT_TRUE(S.contains(Value));
+  EXPECT_THAT(S, testing::UnorderedElementsAreArray(L));
 }
 
 TEST(SmallSet, ConstructorRange) {
-  auto L = {1, 2, 3, 4, 5};
+  std::initializer_list L = {1, 2, 3, 4, 5};
 
   SmallSet S

[llvm-branch-commits] [flang] [flang][Semantics] Add LangOptions to SemanticsContext (PR #110013)

2024-09-25 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 027a0602fc3fc547ba89dd71b2c59304119f3bb5 
c04b0c4e26240e2c1a47b9af4974981ab4535305 --extensions cpp,h -- 
flang/include/flang/Semantics/semantics.h 
flang/lib/Frontend/CompilerInvocation.cpp flang/lib/Semantics/semantics.cpp 
flang/tools/bbc/bbc.cpp
``





View the diff from clang-format here.


``diff
diff --git a/flang/include/flang/Semantics/semantics.h 
b/flang/include/flang/Semantics/semantics.h
index 4ea9f9e081..2a326074b3 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -76,9 +76,7 @@ public:
   const common::LanguageFeatureControl &languageFeatures() const {
 return languageFeatures_;
   }
-  const common::LangOptions &langOptions() const {
-return langOpts_;
-  }
+  const common::LangOptions &langOptions() const { return langOpts_; }
   int GetDefaultKind(TypeCategory) const;
   int doublePrecisionKind() const {
 return defaultKinds_.doublePrecisionKind();

``




https://github.com/llvm/llvm-project/pull/110013
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)

2024-09-25 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff c04b0c4e26240e2c1a47b9af4974981ab4535305 
3c786ad2a50f146d357d882b0c1d966486f7295f --extensions h,cpp -- 
flang/lib/Semantics/check-omp-structure.cpp 
flang/lib/Semantics/check-omp-structure.h
``





View the diff from clang-format here.


``diff
diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index 976c159e25..239bc38392 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -187,14 +187,14 @@ bool 
OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) {
 // Only report it if there is a later version that allows it.
 // If it's not allowed at all, it will be reported by CheckAllowed.
 if (allowedInVersion != 0) {
-  std::string thisVersion{std::to_string(version / 10) + "." +
-  std::to_string(version % 10)};
+  std::string thisVersion{
+  std::to_string(version / 10) + "." + std::to_string(version % 10)};
   std::string goodVersion{std::to_string(allowedInVersion)};
 
   context_.Say(dirCtx.clauseSource,
-   "%s clause is not allowed on directive %s in OpenMP v%s, "
-   "try -fopenmp-version=%d"_err_en_US,
-   clauseName, dirName, thisVersion, allowedInVersion);
+  "%s clause is not allowed on directive %s in OpenMP v%s, "
+  "try -fopenmp-version=%d"_err_en_US,
+  clauseName, dirName, thisVersion, allowedInVersion);
 }
   }
   return CheckAllowed(clause);

``




https://github.com/llvm/llvm-project/pull/110015
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)

2024-09-25 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz created 
https://github.com/llvm/llvm-project/pull/110015

If there is a clause that is allowed on a given directive in a later version of 
the OpenMP spec, report an error and provide the minimal spec version that 
allows the clause.

The case where a clause is not allowed on a directive at all is already handled 
elsewhere.

>From 3c786ad2a50f146d357d882b0c1d966486f7295f Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Tue, 24 Sep 2024 17:41:16 -0500
Subject: [PATCH] [flang][OpenMP] Add version checks for clauses

If there is a clause that is allowed on a given directive in a later
version of the OpenMP spec, report an error and provide the minimal
spec version that allows the clause.

The case where a clause is not allowed on a directive at all is already
handled elsewhere.
---
 flang/lib/Semantics/check-omp-structure.cpp   | 93 +--
 flang/lib/Semantics/check-omp-structure.h |  1 +
 flang/test/Lower/OpenMP/atomic-capture.f90|  4 +-
 flang/test/Lower/OpenMP/atomic-read.f90   |  2 +-
 flang/test/Lower/OpenMP/atomic-update.f90 |  4 +-
 flang/test/Lower/OpenMP/atomic-write.f90  |  2 +-
 .../test/Lower/OpenMP/declare-target-data.f90 |  4 +-
 .../declare-target-deferred-marking.f90   |  4 +-
 .../OpenMP/declare-target-func-and-subr.f90   |  4 +-
 ...arget-implicit-func-and-subr-cap-enter.f90 |  8 +-
 ...lare-target-implicit-func-and-subr-cap.f90 |  8 +-
 .../declare-target-implicit-tarop-cap.f90 |  8 +-
 .../Lower/OpenMP/function-filtering-2.f90 | 12 +--
 .../test/Lower/OpenMP/function-filtering.f90  | 12 +--
 .../OpenMP/declare_target-device_type.f90 |  4 +-
 .../Parser/OpenMP/in-reduction-clause.f90 |  4 +-
 flang/test/Parser/OpenMP/order-clause01.f90   |  4 +-
 flang/test/Parser/OpenMP/tile-size.f90|  4 +-
 flang/test/Parser/OpenMP/unroll-full.f90  |  4 +-
 flang/test/Parser/OpenMP/unroll.f90   |  4 +-
 .../Semantics/OpenMP/atomic-hint-clause.f90   |  2 +-
 flang/test/Semantics/OpenMP/atomic01.f90  |  2 +-
 flang/test/Semantics/OpenMP/atomic05.f90  |  2 +-
 .../Semantics/OpenMP/clause-validity01.f90|  2 +-
 .../OpenMP/declarative-directive.f90  |  2 +-
 .../Semantics/OpenMP/declare-target01.f90 |  2 +-
 .../Semantics/OpenMP/declare-target02.f90 |  2 +-
 .../Semantics/OpenMP/declare-target06.f90 |  2 +-
 .../Semantics/OpenMP/device-constructs.f90|  2 +-
 flang/test/Semantics/OpenMP/flush02.f90   |  2 +-
 flang/test/Semantics/OpenMP/if-clause.f90 |  2 +-
 flang/test/Semantics/OpenMP/nontemporal.f90   |  2 +-
 .../test/Semantics/OpenMP/order-clause01.f90  |  2 +-
 .../Semantics/OpenMP/requires-atomic01.f90|  2 +-
 .../Semantics/OpenMP/requires-atomic02.f90|  2 +-
 flang/test/Semantics/OpenMP/requires04.f90|  2 +-
 flang/test/Semantics/OpenMP/requires05.f90|  2 +-
 .../Semantics/OpenMP/simd-nontemporal.f90 |  2 +-
 flang/test/Semantics/OpenMP/target01.f90  |  2 +-
 flang/test/Semantics/OpenMP/taskgroup01.f90   |  2 +-
 .../test/Semantics/OpenMP/use_device_addr.f90 |  2 +-
 .../Semantics/OpenMP/use_device_addr1.f90 |  2 +-
 .../test/Semantics/OpenMP/use_device_ptr1.f90 |  2 +-
 43 files changed, 137 insertions(+), 99 deletions(-)

diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index dfc3f3290a81be..976c159e252f12 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -16,25 +16,25 @@ namespace Fortran::semantics {
 // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
   }
 
 #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::X &) { \
-CheckAllowed(llvm::omp::Y); \
+CheckAllowedClause(llvm::omp::Y); \
   }
 
 // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment
@@ -163,6 +163,43 @@ class AssociatedLoopChecker {
   std::map constructNamesAndLevels_;
 };
 
+bool OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) {
+  unsigned version{context_.langOptions().OpenMPVersion};
+  DirectiveContext &dirCt

[llvm-branch-commits] [flang] [flang][Semantics] Add LangOptions to SemanticsContext (PR #110013)

2024-09-25 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz created 
https://github.com/llvm/llvm-project/pull/110013

The motivation for this is to make OpenMP settings visible in the semantic 
checks (OpenMP version in particular).

>From c04b0c4e26240e2c1a47b9af4974981ab4535305 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Tue, 24 Sep 2024 15:14:39 -0500
Subject: [PATCH] [flang][Semantics] Add LangOptions to SemanticsContext

The motivation for this is to make OpenMP settings visible
in the semantic checks (OpenMP version in particular).
---
 flang/include/flang/Semantics/semantics.h | 10 --
 flang/lib/Frontend/CompilerInvocation.cpp |  3 ++-
 flang/lib/Semantics/semantics.cpp |  3 ++-
 flang/tools/bbc/bbc.cpp   | 18 +-
 4 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/flang/include/flang/Semantics/semantics.h 
b/flang/include/flang/Semantics/semantics.h
index e73f9d2e85d589..4ea9f9e081b701 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -12,6 +12,7 @@
 #include "scope.h"
 #include "symbol.h"
 #include "flang/Common/Fortran-features.h"
+#include "flang/Common/LangOptions.h"
 #include "flang/Evaluate/common.h"
 #include "flang/Evaluate/intrinsics.h"
 #include "flang/Evaluate/target.h"
@@ -65,7 +66,8 @@ using ConstructStack = std::vector;
 class SemanticsContext {
 public:
   SemanticsContext(const common::IntrinsicTypeDefaultKinds &,
-  const common::LanguageFeatureControl &, parser::AllCookedSources &);
+  const common::LanguageFeatureControl &, const common::LangOptions &,
+  parser::AllCookedSources &);
   ~SemanticsContext();
 
   const common::IntrinsicTypeDefaultKinds &defaultKinds() const {
@@ -73,7 +75,10 @@ class SemanticsContext {
   }
   const common::LanguageFeatureControl &languageFeatures() const {
 return languageFeatures_;
-  };
+  }
+  const common::LangOptions &langOptions() const {
+return langOpts_;
+  }
   int GetDefaultKind(TypeCategory) const;
   int doublePrecisionKind() const {
 return defaultKinds_.doublePrecisionKind();
@@ -273,6 +278,7 @@ class SemanticsContext {
 
   const common::IntrinsicTypeDefaultKinds &defaultKinds_;
   const common::LanguageFeatureControl &languageFeatures_;
+  const common::LangOptions &langOpts_;
   parser::AllCookedSources &allCookedSources_;
   std::optional location_;
   std::vector searchDirectories_;
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp 
b/flang/lib/Frontend/CompilerInvocation.cpp
index 52ca9f61c56f74..05b03ba9ebdf30 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1531,7 +1531,8 @@ CompilerInvocation::getSemanticsCtx(
   auto &fortranOptions = getFortranOpts();
 
   auto semanticsContext = std::make_unique(
-  getDefaultKinds(), fortranOptions.features, allCookedSources);
+  getDefaultKinds(), fortranOptions.features, getLangOpts(),
+  allCookedSources);
 
   semanticsContext->set_moduleDirectory(getModuleDir())
   .set_searchDirectories(fortranOptions.searchDirectories)
diff --git a/flang/lib/Semantics/semantics.cpp 
b/flang/lib/Semantics/semantics.cpp
index 8592d1e5d6217e..1f2980b07b3e0e 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -348,9 +348,10 @@ class CommonBlockMap {
 SemanticsContext::SemanticsContext(
 const common::IntrinsicTypeDefaultKinds &defaultKinds,
 const common::LanguageFeatureControl &languageFeatures,
+const common::LangOptions &langOpts,
 parser::AllCookedSources &allCookedSources)
 : defaultKinds_{defaultKinds}, languageFeatures_{languageFeatures},
-  allCookedSources_{allCookedSources},
+  langOpts_{langOpts}, allCookedSources_{allCookedSources},
   intrinsics_{evaluate::IntrinsicProcTable::Configure(defaultKinds_)},
   globalScope_{*this}, intrinsicModulesScope_{globalScope_.MakeScope(
Scope::Kind::IntrinsicModules, nullptr)},
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index dcff4503f16571..2a976d5a52fae6 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -15,6 +15,7 @@
 
//===--===//
 
 #include "flang/Common/Fortran-features.h"
+#include "flang/Common/LangOptions.h"
 #include "flang/Common/OpenMP-features.h"
 #include "flang/Common/Version.h"
 #include "flang/Common/default-kinds.h"
@@ -507,6 +508,21 @@ int main(int argc, char **argv) {
   options.predefinitions.emplace_back(
   "__flang_patchlevel__"s, std::string{FLANG_VERSION_PATCHLEVEL_STRING});
 
+  Fortran::common::LangOptions langOpts;
+  langOpts.NoGPULib = setNoGPULib;
+  langOpts.OpenMPVersion = setOpenMPVersion;
+  langOpts.OpenMPIsTargetDevice = enableOpenMPDevice;
+  langOpts.OpenMPIsGPU = enableOpenMPGPU;
+  langOpts.OpenMPForceUSM = enableOpenMPForceUSM;
+  langOpts.OpenMPTargetDebug = setOpenMPTargetDebug;
+  langOpts.Op

[llvm-branch-commits] [flang] [flang][Semantics] Add LangOptions to SemanticsContext (PR #110013)

2024-09-25 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-flang-semantics

@llvm/pr-subscribers-flang-driver

Author: Krzysztof Parzyszek (kparzysz)


Changes

The motivation for this is to make OpenMP settings visible in the semantic 
checks (OpenMP version in particular).

---
Full diff: https://github.com/llvm/llvm-project/pull/110013.diff


4 Files Affected:

- (modified) flang/include/flang/Semantics/semantics.h (+8-2) 
- (modified) flang/lib/Frontend/CompilerInvocation.cpp (+2-1) 
- (modified) flang/lib/Semantics/semantics.cpp (+2-1) 
- (modified) flang/tools/bbc/bbc.cpp (+17-1) 


``diff
diff --git a/flang/include/flang/Semantics/semantics.h 
b/flang/include/flang/Semantics/semantics.h
index e73f9d2e85d589..4ea9f9e081b701 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -12,6 +12,7 @@
 #include "scope.h"
 #include "symbol.h"
 #include "flang/Common/Fortran-features.h"
+#include "flang/Common/LangOptions.h"
 #include "flang/Evaluate/common.h"
 #include "flang/Evaluate/intrinsics.h"
 #include "flang/Evaluate/target.h"
@@ -65,7 +66,8 @@ using ConstructStack = std::vector;
 class SemanticsContext {
 public:
   SemanticsContext(const common::IntrinsicTypeDefaultKinds &,
-  const common::LanguageFeatureControl &, parser::AllCookedSources &);
+  const common::LanguageFeatureControl &, const common::LangOptions &,
+  parser::AllCookedSources &);
   ~SemanticsContext();
 
   const common::IntrinsicTypeDefaultKinds &defaultKinds() const {
@@ -73,7 +75,10 @@ class SemanticsContext {
   }
   const common::LanguageFeatureControl &languageFeatures() const {
 return languageFeatures_;
-  };
+  }
+  const common::LangOptions &langOptions() const {
+return langOpts_;
+  }
   int GetDefaultKind(TypeCategory) const;
   int doublePrecisionKind() const {
 return defaultKinds_.doublePrecisionKind();
@@ -273,6 +278,7 @@ class SemanticsContext {
 
   const common::IntrinsicTypeDefaultKinds &defaultKinds_;
   const common::LanguageFeatureControl &languageFeatures_;
+  const common::LangOptions &langOpts_;
   parser::AllCookedSources &allCookedSources_;
   std::optional location_;
   std::vector searchDirectories_;
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp 
b/flang/lib/Frontend/CompilerInvocation.cpp
index 52ca9f61c56f74..05b03ba9ebdf30 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1531,7 +1531,8 @@ CompilerInvocation::getSemanticsCtx(
   auto &fortranOptions = getFortranOpts();
 
   auto semanticsContext = std::make_unique(
-  getDefaultKinds(), fortranOptions.features, allCookedSources);
+  getDefaultKinds(), fortranOptions.features, getLangOpts(),
+  allCookedSources);
 
   semanticsContext->set_moduleDirectory(getModuleDir())
   .set_searchDirectories(fortranOptions.searchDirectories)
diff --git a/flang/lib/Semantics/semantics.cpp 
b/flang/lib/Semantics/semantics.cpp
index 8592d1e5d6217e..1f2980b07b3e0e 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -348,9 +348,10 @@ class CommonBlockMap {
 SemanticsContext::SemanticsContext(
 const common::IntrinsicTypeDefaultKinds &defaultKinds,
 const common::LanguageFeatureControl &languageFeatures,
+const common::LangOptions &langOpts,
 parser::AllCookedSources &allCookedSources)
 : defaultKinds_{defaultKinds}, languageFeatures_{languageFeatures},
-  allCookedSources_{allCookedSources},
+  langOpts_{langOpts}, allCookedSources_{allCookedSources},
   intrinsics_{evaluate::IntrinsicProcTable::Configure(defaultKinds_)},
   globalScope_{*this}, intrinsicModulesScope_{globalScope_.MakeScope(
Scope::Kind::IntrinsicModules, nullptr)},
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index dcff4503f16571..2a976d5a52fae6 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -15,6 +15,7 @@
 
//===--===//
 
 #include "flang/Common/Fortran-features.h"
+#include "flang/Common/LangOptions.h"
 #include "flang/Common/OpenMP-features.h"
 #include "flang/Common/Version.h"
 #include "flang/Common/default-kinds.h"
@@ -507,6 +508,21 @@ int main(int argc, char **argv) {
   options.predefinitions.emplace_back(
   "__flang_patchlevel__"s, std::string{FLANG_VERSION_PATCHLEVEL_STRING});
 
+  Fortran::common::LangOptions langOpts;
+  langOpts.NoGPULib = setNoGPULib;
+  langOpts.OpenMPVersion = setOpenMPVersion;
+  langOpts.OpenMPIsTargetDevice = enableOpenMPDevice;
+  langOpts.OpenMPIsGPU = enableOpenMPGPU;
+  langOpts.OpenMPForceUSM = enableOpenMPForceUSM;
+  langOpts.OpenMPTargetDebug = setOpenMPTargetDebug;
+  langOpts.OpenMPThreadSubscription = setOpenMPThreadSubscription;
+  langOpts.OpenMPTeamSubscription = setOpenMPTeamSubscription;
+  langOpts.OpenMPNoThreadState = setOpenMPNoThreadState;
+  langOpts.OpenMPNoNestedParallelism = setOpe

[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)

2024-09-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Krzysztof Parzyszek (kparzysz)


Changes

If there is a clause that is allowed on a given directive in a later version of 
the OpenMP spec, report an error and provide the minimal spec version that 
allows the clause.

The case where a clause is not allowed on a directive at all is already handled 
elsewhere.

---

Patch is 41.30 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/110015.diff


43 Files Affected:

- (modified) flang/lib/Semantics/check-omp-structure.cpp (+65-28) 
- (modified) flang/lib/Semantics/check-omp-structure.h (+1) 
- (modified) flang/test/Lower/OpenMP/atomic-capture.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/atomic-read.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/atomic-update.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/atomic-write.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/declare-target-data.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/declare-target-deferred-marking.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 (+2-2) 
- (modified) 
flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 
(+4-4) 
- (modified) 
flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 (+4-4) 
- (modified) flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 
(+4-4) 
- (modified) flang/test/Lower/OpenMP/function-filtering-2.f90 (+6-6) 
- (modified) flang/test/Lower/OpenMP/function-filtering.f90 (+6-6) 
- (modified) flang/test/Parser/OpenMP/declare_target-device_type.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/in-reduction-clause.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/order-clause01.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/tile-size.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/unroll-full.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/unroll.f90 (+2-2) 
- (modified) flang/test/Semantics/OpenMP/atomic-hint-clause.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/atomic01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/atomic05.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/clause-validity01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/declarative-directive.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/declare-target01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/declare-target02.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/declare-target06.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/device-constructs.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/flush02.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/if-clause.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/nontemporal.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/order-clause01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/requires-atomic01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/requires-atomic02.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/requires04.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/requires05.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/simd-nontemporal.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/target01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/taskgroup01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/use_device_addr.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/use_device_addr1.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/use_device_ptr1.f90 (+1-1) 


``diff
diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index dfc3f3290a81be..976c159e252f12 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -16,25 +16,25 @@ namespace Fortran::semantics {
 // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
   }
 
 #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::X &) { \
-CheckAllowed(llvm::omp::Y); \
+CheckAllowedClause(llvm::omp::Y); \
   }
 
 // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment
@@ -163,6 +163,

[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)

2024-09-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-fir-hlfir

Author: Krzysztof Parzyszek (kparzysz)


Changes

If there is a clause that is allowed on a given directive in a later version of 
the OpenMP spec, report an error and provide the minimal spec version that 
allows the clause.

The case where a clause is not allowed on a directive at all is already handled 
elsewhere.

---

Patch is 41.30 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/110015.diff


43 Files Affected:

- (modified) flang/lib/Semantics/check-omp-structure.cpp (+65-28) 
- (modified) flang/lib/Semantics/check-omp-structure.h (+1) 
- (modified) flang/test/Lower/OpenMP/atomic-capture.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/atomic-read.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/atomic-update.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/atomic-write.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/declare-target-data.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/declare-target-deferred-marking.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 (+2-2) 
- (modified) 
flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 
(+4-4) 
- (modified) 
flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 (+4-4) 
- (modified) flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 
(+4-4) 
- (modified) flang/test/Lower/OpenMP/function-filtering-2.f90 (+6-6) 
- (modified) flang/test/Lower/OpenMP/function-filtering.f90 (+6-6) 
- (modified) flang/test/Parser/OpenMP/declare_target-device_type.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/in-reduction-clause.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/order-clause01.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/tile-size.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/unroll-full.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/unroll.f90 (+2-2) 
- (modified) flang/test/Semantics/OpenMP/atomic-hint-clause.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/atomic01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/atomic05.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/clause-validity01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/declarative-directive.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/declare-target01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/declare-target02.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/declare-target06.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/device-constructs.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/flush02.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/if-clause.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/nontemporal.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/order-clause01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/requires-atomic01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/requires-atomic02.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/requires04.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/requires05.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/simd-nontemporal.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/target01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/taskgroup01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/use_device_addr.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/use_device_addr1.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/use_device_ptr1.f90 (+1-1) 


``diff
diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index dfc3f3290a81be..976c159e252f12 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -16,25 +16,25 @@ namespace Fortran::semantics {
 // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
   }
 
 #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::X &) { \
-CheckAllowed(llvm::omp::Y); \
+CheckAllowedClause(llvm::omp::Y); \
   }
 
 // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment
@@ -163,6 +1

[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)

2024-09-25 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-flang-semantics

@llvm/pr-subscribers-flang-parser

Author: Krzysztof Parzyszek (kparzysz)


Changes

If there is a clause that is allowed on a given directive in a later version of 
the OpenMP spec, report an error and provide the minimal spec version that 
allows the clause.

The case where a clause is not allowed on a directive at all is already handled 
elsewhere.

---

Patch is 41.30 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/110015.diff


43 Files Affected:

- (modified) flang/lib/Semantics/check-omp-structure.cpp (+65-28) 
- (modified) flang/lib/Semantics/check-omp-structure.h (+1) 
- (modified) flang/test/Lower/OpenMP/atomic-capture.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/atomic-read.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/atomic-update.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/atomic-write.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/declare-target-data.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/declare-target-deferred-marking.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 (+2-2) 
- (modified) 
flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 
(+4-4) 
- (modified) 
flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 (+4-4) 
- (modified) flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 
(+4-4) 
- (modified) flang/test/Lower/OpenMP/function-filtering-2.f90 (+6-6) 
- (modified) flang/test/Lower/OpenMP/function-filtering.f90 (+6-6) 
- (modified) flang/test/Parser/OpenMP/declare_target-device_type.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/in-reduction-clause.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/order-clause01.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/tile-size.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/unroll-full.f90 (+2-2) 
- (modified) flang/test/Parser/OpenMP/unroll.f90 (+2-2) 
- (modified) flang/test/Semantics/OpenMP/atomic-hint-clause.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/atomic01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/atomic05.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/clause-validity01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/declarative-directive.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/declare-target01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/declare-target02.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/declare-target06.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/device-constructs.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/flush02.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/if-clause.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/nontemporal.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/order-clause01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/requires-atomic01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/requires-atomic02.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/requires04.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/requires05.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/simd-nontemporal.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/target01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/taskgroup01.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/use_device_addr.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/use_device_addr1.f90 (+1-1) 
- (modified) flang/test/Semantics/OpenMP/use_device_ptr1.f90 (+1-1) 


``diff
diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index dfc3f3290a81be..976c159e252f12 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -16,25 +16,25 @@ namespace Fortran::semantics {
 // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
   }
 
 #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::X &) { \
-CheckAllowed(llvm::omp::Y); \
+CheckAllowedClause(llvm::omp::Y); \
   }
 
 // 'OmpWorkshareBlockChecker' is used to check the valid

[llvm-branch-commits] [clang] release/19.x: [clang-scan-deps] Don't inspect Args[0] as an option (#109050) (PR #109865)

2024-09-25 Thread Jan Svoboda via llvm-branch-commits
Martin =?utf-8?q?Storsj=C3=B6?= ,
Martin =?utf-8?q?Storsj=C3=B6?= ,
Martin =?utf-8?q?Storsj=C3=B6?= 
Message-ID:
In-Reply-To: 


https://github.com/jansvoboda11 approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/109865
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][Semantics] Add LangOptions to SemanticsContext (PR #110013)

2024-09-25 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz updated 
https://github.com/llvm/llvm-project/pull/110013

>From c04b0c4e26240e2c1a47b9af4974981ab4535305 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Tue, 24 Sep 2024 15:14:39 -0500
Subject: [PATCH] [flang][Semantics] Add LangOptions to SemanticsContext

The motivation for this is to make OpenMP settings visible
in the semantic checks (OpenMP version in particular).
---
 flang/include/flang/Semantics/semantics.h | 10 --
 flang/lib/Frontend/CompilerInvocation.cpp |  3 ++-
 flang/lib/Semantics/semantics.cpp |  3 ++-
 flang/tools/bbc/bbc.cpp   | 18 +-
 4 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/flang/include/flang/Semantics/semantics.h 
b/flang/include/flang/Semantics/semantics.h
index e73f9d2e85d589..4ea9f9e081b701 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -12,6 +12,7 @@
 #include "scope.h"
 #include "symbol.h"
 #include "flang/Common/Fortran-features.h"
+#include "flang/Common/LangOptions.h"
 #include "flang/Evaluate/common.h"
 #include "flang/Evaluate/intrinsics.h"
 #include "flang/Evaluate/target.h"
@@ -65,7 +66,8 @@ using ConstructStack = std::vector;
 class SemanticsContext {
 public:
   SemanticsContext(const common::IntrinsicTypeDefaultKinds &,
-  const common::LanguageFeatureControl &, parser::AllCookedSources &);
+  const common::LanguageFeatureControl &, const common::LangOptions &,
+  parser::AllCookedSources &);
   ~SemanticsContext();
 
   const common::IntrinsicTypeDefaultKinds &defaultKinds() const {
@@ -73,7 +75,10 @@ class SemanticsContext {
   }
   const common::LanguageFeatureControl &languageFeatures() const {
 return languageFeatures_;
-  };
+  }
+  const common::LangOptions &langOptions() const {
+return langOpts_;
+  }
   int GetDefaultKind(TypeCategory) const;
   int doublePrecisionKind() const {
 return defaultKinds_.doublePrecisionKind();
@@ -273,6 +278,7 @@ class SemanticsContext {
 
   const common::IntrinsicTypeDefaultKinds &defaultKinds_;
   const common::LanguageFeatureControl &languageFeatures_;
+  const common::LangOptions &langOpts_;
   parser::AllCookedSources &allCookedSources_;
   std::optional location_;
   std::vector searchDirectories_;
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp 
b/flang/lib/Frontend/CompilerInvocation.cpp
index 52ca9f61c56f74..05b03ba9ebdf30 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1531,7 +1531,8 @@ CompilerInvocation::getSemanticsCtx(
   auto &fortranOptions = getFortranOpts();
 
   auto semanticsContext = std::make_unique(
-  getDefaultKinds(), fortranOptions.features, allCookedSources);
+  getDefaultKinds(), fortranOptions.features, getLangOpts(),
+  allCookedSources);
 
   semanticsContext->set_moduleDirectory(getModuleDir())
   .set_searchDirectories(fortranOptions.searchDirectories)
diff --git a/flang/lib/Semantics/semantics.cpp 
b/flang/lib/Semantics/semantics.cpp
index 8592d1e5d6217e..1f2980b07b3e0e 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -348,9 +348,10 @@ class CommonBlockMap {
 SemanticsContext::SemanticsContext(
 const common::IntrinsicTypeDefaultKinds &defaultKinds,
 const common::LanguageFeatureControl &languageFeatures,
+const common::LangOptions &langOpts,
 parser::AllCookedSources &allCookedSources)
 : defaultKinds_{defaultKinds}, languageFeatures_{languageFeatures},
-  allCookedSources_{allCookedSources},
+  langOpts_{langOpts}, allCookedSources_{allCookedSources},
   intrinsics_{evaluate::IntrinsicProcTable::Configure(defaultKinds_)},
   globalScope_{*this}, intrinsicModulesScope_{globalScope_.MakeScope(
Scope::Kind::IntrinsicModules, nullptr)},
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index dcff4503f16571..2a976d5a52fae6 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -15,6 +15,7 @@
 
//===--===//
 
 #include "flang/Common/Fortran-features.h"
+#include "flang/Common/LangOptions.h"
 #include "flang/Common/OpenMP-features.h"
 #include "flang/Common/Version.h"
 #include "flang/Common/default-kinds.h"
@@ -507,6 +508,21 @@ int main(int argc, char **argv) {
   options.predefinitions.emplace_back(
   "__flang_patchlevel__"s, std::string{FLANG_VERSION_PATCHLEVEL_STRING});
 
+  Fortran::common::LangOptions langOpts;
+  langOpts.NoGPULib = setNoGPULib;
+  langOpts.OpenMPVersion = setOpenMPVersion;
+  langOpts.OpenMPIsTargetDevice = enableOpenMPDevice;
+  langOpts.OpenMPIsGPU = enableOpenMPGPU;
+  langOpts.OpenMPForceUSM = enableOpenMPForceUSM;
+  langOpts.OpenMPTargetDebug = setOpenMPTargetDebug;
+  langOpts.OpenMPThreadSubscription = setOpenMPThreadSubscription;
+  langOpts.OpenMPTeamSubscription = setOpenMPTeamSubscription

[llvm-branch-commits] [flang] [flang][Semantics] Add LangOptions to SemanticsContext (PR #110013)

2024-09-25 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz updated 
https://github.com/llvm/llvm-project/pull/110013

>From c04b0c4e26240e2c1a47b9af4974981ab4535305 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Tue, 24 Sep 2024 15:14:39 -0500
Subject: [PATCH 1/2] [flang][Semantics] Add LangOptions to SemanticsContext

The motivation for this is to make OpenMP settings visible
in the semantic checks (OpenMP version in particular).
---
 flang/include/flang/Semantics/semantics.h | 10 --
 flang/lib/Frontend/CompilerInvocation.cpp |  3 ++-
 flang/lib/Semantics/semantics.cpp |  3 ++-
 flang/tools/bbc/bbc.cpp   | 18 +-
 4 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/flang/include/flang/Semantics/semantics.h 
b/flang/include/flang/Semantics/semantics.h
index e73f9d2e85d589..4ea9f9e081b701 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -12,6 +12,7 @@
 #include "scope.h"
 #include "symbol.h"
 #include "flang/Common/Fortran-features.h"
+#include "flang/Common/LangOptions.h"
 #include "flang/Evaluate/common.h"
 #include "flang/Evaluate/intrinsics.h"
 #include "flang/Evaluate/target.h"
@@ -65,7 +66,8 @@ using ConstructStack = std::vector;
 class SemanticsContext {
 public:
   SemanticsContext(const common::IntrinsicTypeDefaultKinds &,
-  const common::LanguageFeatureControl &, parser::AllCookedSources &);
+  const common::LanguageFeatureControl &, const common::LangOptions &,
+  parser::AllCookedSources &);
   ~SemanticsContext();
 
   const common::IntrinsicTypeDefaultKinds &defaultKinds() const {
@@ -73,7 +75,10 @@ class SemanticsContext {
   }
   const common::LanguageFeatureControl &languageFeatures() const {
 return languageFeatures_;
-  };
+  }
+  const common::LangOptions &langOptions() const {
+return langOpts_;
+  }
   int GetDefaultKind(TypeCategory) const;
   int doublePrecisionKind() const {
 return defaultKinds_.doublePrecisionKind();
@@ -273,6 +278,7 @@ class SemanticsContext {
 
   const common::IntrinsicTypeDefaultKinds &defaultKinds_;
   const common::LanguageFeatureControl &languageFeatures_;
+  const common::LangOptions &langOpts_;
   parser::AllCookedSources &allCookedSources_;
   std::optional location_;
   std::vector searchDirectories_;
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp 
b/flang/lib/Frontend/CompilerInvocation.cpp
index 52ca9f61c56f74..05b03ba9ebdf30 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1531,7 +1531,8 @@ CompilerInvocation::getSemanticsCtx(
   auto &fortranOptions = getFortranOpts();
 
   auto semanticsContext = std::make_unique(
-  getDefaultKinds(), fortranOptions.features, allCookedSources);
+  getDefaultKinds(), fortranOptions.features, getLangOpts(),
+  allCookedSources);
 
   semanticsContext->set_moduleDirectory(getModuleDir())
   .set_searchDirectories(fortranOptions.searchDirectories)
diff --git a/flang/lib/Semantics/semantics.cpp 
b/flang/lib/Semantics/semantics.cpp
index 8592d1e5d6217e..1f2980b07b3e0e 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -348,9 +348,10 @@ class CommonBlockMap {
 SemanticsContext::SemanticsContext(
 const common::IntrinsicTypeDefaultKinds &defaultKinds,
 const common::LanguageFeatureControl &languageFeatures,
+const common::LangOptions &langOpts,
 parser::AllCookedSources &allCookedSources)
 : defaultKinds_{defaultKinds}, languageFeatures_{languageFeatures},
-  allCookedSources_{allCookedSources},
+  langOpts_{langOpts}, allCookedSources_{allCookedSources},
   intrinsics_{evaluate::IntrinsicProcTable::Configure(defaultKinds_)},
   globalScope_{*this}, intrinsicModulesScope_{globalScope_.MakeScope(
Scope::Kind::IntrinsicModules, nullptr)},
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index dcff4503f16571..2a976d5a52fae6 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -15,6 +15,7 @@
 
//===--===//
 
 #include "flang/Common/Fortran-features.h"
+#include "flang/Common/LangOptions.h"
 #include "flang/Common/OpenMP-features.h"
 #include "flang/Common/Version.h"
 #include "flang/Common/default-kinds.h"
@@ -507,6 +508,21 @@ int main(int argc, char **argv) {
   options.predefinitions.emplace_back(
   "__flang_patchlevel__"s, std::string{FLANG_VERSION_PATCHLEVEL_STRING});
 
+  Fortran::common::LangOptions langOpts;
+  langOpts.NoGPULib = setNoGPULib;
+  langOpts.OpenMPVersion = setOpenMPVersion;
+  langOpts.OpenMPIsTargetDevice = enableOpenMPDevice;
+  langOpts.OpenMPIsGPU = enableOpenMPGPU;
+  langOpts.OpenMPForceUSM = enableOpenMPForceUSM;
+  langOpts.OpenMPTargetDebug = setOpenMPTargetDebug;
+  langOpts.OpenMPThreadSubscription = setOpenMPThreadSubscription;
+  langOpts.OpenMPTeamSubscription = setOpenMPTeamSubscrip

[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)

2024-09-25 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz updated 
https://github.com/llvm/llvm-project/pull/110015

>From 3c786ad2a50f146d357d882b0c1d966486f7295f Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Tue, 24 Sep 2024 17:41:16 -0500
Subject: [PATCH] [flang][OpenMP] Add version checks for clauses

If there is a clause that is allowed on a given directive in a later
version of the OpenMP spec, report an error and provide the minimal
spec version that allows the clause.

The case where a clause is not allowed on a directive at all is already
handled elsewhere.
---
 flang/lib/Semantics/check-omp-structure.cpp   | 93 +--
 flang/lib/Semantics/check-omp-structure.h |  1 +
 flang/test/Lower/OpenMP/atomic-capture.f90|  4 +-
 flang/test/Lower/OpenMP/atomic-read.f90   |  2 +-
 flang/test/Lower/OpenMP/atomic-update.f90 |  4 +-
 flang/test/Lower/OpenMP/atomic-write.f90  |  2 +-
 .../test/Lower/OpenMP/declare-target-data.f90 |  4 +-
 .../declare-target-deferred-marking.f90   |  4 +-
 .../OpenMP/declare-target-func-and-subr.f90   |  4 +-
 ...arget-implicit-func-and-subr-cap-enter.f90 |  8 +-
 ...lare-target-implicit-func-and-subr-cap.f90 |  8 +-
 .../declare-target-implicit-tarop-cap.f90 |  8 +-
 .../Lower/OpenMP/function-filtering-2.f90 | 12 +--
 .../test/Lower/OpenMP/function-filtering.f90  | 12 +--
 .../OpenMP/declare_target-device_type.f90 |  4 +-
 .../Parser/OpenMP/in-reduction-clause.f90 |  4 +-
 flang/test/Parser/OpenMP/order-clause01.f90   |  4 +-
 flang/test/Parser/OpenMP/tile-size.f90|  4 +-
 flang/test/Parser/OpenMP/unroll-full.f90  |  4 +-
 flang/test/Parser/OpenMP/unroll.f90   |  4 +-
 .../Semantics/OpenMP/atomic-hint-clause.f90   |  2 +-
 flang/test/Semantics/OpenMP/atomic01.f90  |  2 +-
 flang/test/Semantics/OpenMP/atomic05.f90  |  2 +-
 .../Semantics/OpenMP/clause-validity01.f90|  2 +-
 .../OpenMP/declarative-directive.f90  |  2 +-
 .../Semantics/OpenMP/declare-target01.f90 |  2 +-
 .../Semantics/OpenMP/declare-target02.f90 |  2 +-
 .../Semantics/OpenMP/declare-target06.f90 |  2 +-
 .../Semantics/OpenMP/device-constructs.f90|  2 +-
 flang/test/Semantics/OpenMP/flush02.f90   |  2 +-
 flang/test/Semantics/OpenMP/if-clause.f90 |  2 +-
 flang/test/Semantics/OpenMP/nontemporal.f90   |  2 +-
 .../test/Semantics/OpenMP/order-clause01.f90  |  2 +-
 .../Semantics/OpenMP/requires-atomic01.f90|  2 +-
 .../Semantics/OpenMP/requires-atomic02.f90|  2 +-
 flang/test/Semantics/OpenMP/requires04.f90|  2 +-
 flang/test/Semantics/OpenMP/requires05.f90|  2 +-
 .../Semantics/OpenMP/simd-nontemporal.f90 |  2 +-
 flang/test/Semantics/OpenMP/target01.f90  |  2 +-
 flang/test/Semantics/OpenMP/taskgroup01.f90   |  2 +-
 .../test/Semantics/OpenMP/use_device_addr.f90 |  2 +-
 .../Semantics/OpenMP/use_device_addr1.f90 |  2 +-
 .../test/Semantics/OpenMP/use_device_ptr1.f90 |  2 +-
 43 files changed, 137 insertions(+), 99 deletions(-)

diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index dfc3f3290a81be..976c159e252f12 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -16,25 +16,25 @@ namespace Fortran::semantics {
 // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
   }
 
 #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::X &) { \
-CheckAllowed(llvm::omp::Y); \
+CheckAllowedClause(llvm::omp::Y); \
   }
 
 // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment
@@ -163,6 +163,43 @@ class AssociatedLoopChecker {
   std::map constructNamesAndLevels_;
 };
 
+bool OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) {
+  unsigned version{context_.langOptions().OpenMPVersion};
+  DirectiveContext &dirCtx = GetContext();
+  llvm::omp::Directive dir{dirCtx.directive};
+
+  if (!llvm::omp::isAllowedClauseForDirective(dir, clause, version)) {
+unsigned allowedInVersion{[&] {
+  for (unsigned v : {45, 50, 51, 52, 60}) {
+if (v <= version) {
+  conti

[llvm-branch-commits] [flang] [mlir] [MLIR][OpenMP] Normalize representation of entry block arg-defining clauses (PR #109809)

2024-09-25 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah approved this pull request.

Thanks for the cleanup!

https://github.com/llvm/llvm-project/pull/109809
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Document entry block argument-defining clauses (NFC) (PR #109811)

2024-09-25 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah approved this pull request.

LGTM, thanks!

https://github.com/llvm/llvm-project/pull/109811
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)

2024-09-25 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz updated 
https://github.com/llvm/llvm-project/pull/110015

>From 3c786ad2a50f146d357d882b0c1d966486f7295f Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Tue, 24 Sep 2024 17:41:16 -0500
Subject: [PATCH 1/3] [flang][OpenMP] Add version checks for clauses

If there is a clause that is allowed on a given directive in a later
version of the OpenMP spec, report an error and provide the minimal
spec version that allows the clause.

The case where a clause is not allowed on a directive at all is already
handled elsewhere.
---
 flang/lib/Semantics/check-omp-structure.cpp   | 93 +--
 flang/lib/Semantics/check-omp-structure.h |  1 +
 flang/test/Lower/OpenMP/atomic-capture.f90|  4 +-
 flang/test/Lower/OpenMP/atomic-read.f90   |  2 +-
 flang/test/Lower/OpenMP/atomic-update.f90 |  4 +-
 flang/test/Lower/OpenMP/atomic-write.f90  |  2 +-
 .../test/Lower/OpenMP/declare-target-data.f90 |  4 +-
 .../declare-target-deferred-marking.f90   |  4 +-
 .../OpenMP/declare-target-func-and-subr.f90   |  4 +-
 ...arget-implicit-func-and-subr-cap-enter.f90 |  8 +-
 ...lare-target-implicit-func-and-subr-cap.f90 |  8 +-
 .../declare-target-implicit-tarop-cap.f90 |  8 +-
 .../Lower/OpenMP/function-filtering-2.f90 | 12 +--
 .../test/Lower/OpenMP/function-filtering.f90  | 12 +--
 .../OpenMP/declare_target-device_type.f90 |  4 +-
 .../Parser/OpenMP/in-reduction-clause.f90 |  4 +-
 flang/test/Parser/OpenMP/order-clause01.f90   |  4 +-
 flang/test/Parser/OpenMP/tile-size.f90|  4 +-
 flang/test/Parser/OpenMP/unroll-full.f90  |  4 +-
 flang/test/Parser/OpenMP/unroll.f90   |  4 +-
 .../Semantics/OpenMP/atomic-hint-clause.f90   |  2 +-
 flang/test/Semantics/OpenMP/atomic01.f90  |  2 +-
 flang/test/Semantics/OpenMP/atomic05.f90  |  2 +-
 .../Semantics/OpenMP/clause-validity01.f90|  2 +-
 .../OpenMP/declarative-directive.f90  |  2 +-
 .../Semantics/OpenMP/declare-target01.f90 |  2 +-
 .../Semantics/OpenMP/declare-target02.f90 |  2 +-
 .../Semantics/OpenMP/declare-target06.f90 |  2 +-
 .../Semantics/OpenMP/device-constructs.f90|  2 +-
 flang/test/Semantics/OpenMP/flush02.f90   |  2 +-
 flang/test/Semantics/OpenMP/if-clause.f90 |  2 +-
 flang/test/Semantics/OpenMP/nontemporal.f90   |  2 +-
 .../test/Semantics/OpenMP/order-clause01.f90  |  2 +-
 .../Semantics/OpenMP/requires-atomic01.f90|  2 +-
 .../Semantics/OpenMP/requires-atomic02.f90|  2 +-
 flang/test/Semantics/OpenMP/requires04.f90|  2 +-
 flang/test/Semantics/OpenMP/requires05.f90|  2 +-
 .../Semantics/OpenMP/simd-nontemporal.f90 |  2 +-
 flang/test/Semantics/OpenMP/target01.f90  |  2 +-
 flang/test/Semantics/OpenMP/taskgroup01.f90   |  2 +-
 .../test/Semantics/OpenMP/use_device_addr.f90 |  2 +-
 .../Semantics/OpenMP/use_device_addr1.f90 |  2 +-
 .../test/Semantics/OpenMP/use_device_ptr1.f90 |  2 +-
 43 files changed, 137 insertions(+), 99 deletions(-)

diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index dfc3f3290a81be..976c159e252f12 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -16,25 +16,25 @@ namespace Fortran::semantics {
 // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
   }
 
 #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::X &) { \
-CheckAllowed(llvm::omp::Y); \
+CheckAllowedClause(llvm::omp::Y); \
   }
 
 // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment
@@ -163,6 +163,43 @@ class AssociatedLoopChecker {
   std::map constructNamesAndLevels_;
 };
 
+bool OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) {
+  unsigned version{context_.langOptions().OpenMPVersion};
+  DirectiveContext &dirCtx = GetContext();
+  llvm::omp::Directive dir{dirCtx.directive};
+
+  if (!llvm::omp::isAllowedClauseForDirective(dir, clause, version)) {
+unsigned allowedInVersion{[&] {
+  for (unsigned v : {45, 50, 51, 52, 60}) {
+if (v <= version) {
+  c

[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)

2024-09-25 Thread Akshat Oke via llvm-branch-commits

https://github.com/Akshat-Oke updated 
https://github.com/llvm/llvm-project/pull/109939

>From 646d2d1a54ca0ac3bc312f4038826fb431890bf6 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 24 Sep 2024 11:41:18 +
Subject: [PATCH] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM

---
 .../llvm/Passes/MachinePassRegistry.def   |  4 +-
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  6 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  6 +-
 .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp| 60 ---
 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 30 ++
 .../AMDGPU/si-pre-allocate-wwm-regs.mir   | 26 
 .../si-pre-allocate-wwm-sgpr-spills.mir   | 21 +++
 7 files changed, 124 insertions(+), 29 deletions(-)
 create mode 100644 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h
 create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir

diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index bdc56ca03f392a..72e2cf232bfd17 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -96,6 +96,7 @@ LOOP_PASS("loop-term-fold", LoopTermFoldPass())
 // computed. (We still either need to regenerate kill flags after regalloc, or
 // preferably fix the scavenger to not depend on them).
 MACHINE_FUNCTION_ANALYSIS("live-intervals", LiveIntervalsAnalysis())
+MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis())
 MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis())
 MACHINE_FUNCTION_ANALYSIS("machine-block-freq", 
MachineBlockFrequencyAnalysis())
 MACHINE_FUNCTION_ANALYSIS("machine-branch-prob",
@@ -122,8 +123,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", 
VirtRegMapAnalysis())
 // MachineRegionInfoPassAnalysis())
 // MACHINE_FUNCTION_ANALYSIS("machine-trace-metrics",
 // MachineTraceMetricsAnalysis()) MACHINE_FUNCTION_ANALYSIS("reaching-def",
-// ReachingDefAnalysisAnalysis()) MACHINE_FUNCTION_ANALYSIS("live-reg-matrix",
-// LiveRegMatrixAnalysis()) MACHINE_FUNCTION_ANALYSIS("gc-analysis",
+// ReachingDefAnalysisAnalysis())  MACHINE_FUNCTION_ANALYSIS("gc-analysis",
 // GCMachineCodeAnalysisPass())
 #undef MACHINE_FUNCTION_ANALYSIS
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index b2dd354e496a2e..c0fd5e4625895a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -49,7 +49,7 @@ FunctionPass *createSIFixSGPRCopiesLegacyPass();
 FunctionPass *createLowerWWMCopiesPass();
 FunctionPass *createSIMemoryLegalizerPass();
 FunctionPass *createSIInsertWaitcntsPass();
-FunctionPass *createSIPreAllocateWWMRegsPass();
+FunctionPass *createSIPreAllocateWWMRegsLegacyPass();
 FunctionPass *createSIFormMemoryClausesPass();
 
 FunctionPass *createSIPostRABundlerPass();
@@ -208,8 +208,8 @@ extern char &SILateBranchLoweringPassID;
 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
 extern char &SIOptimizeExecMaskingID;
 
-void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
-extern char &SIPreAllocateWWMRegsID;
+void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &);
+extern char &SIPreAllocateWWMRegsLegacyID;
 
 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
 extern char &AMDGPUImageIntrinsicOptimizerID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 04fdee0819b502..a39293863d1c54 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -461,7 +461,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSILateBranchLoweringPass(*PR);
   initializeSIMemoryLegalizerPass(*PR);
   initializeSIOptimizeExecMaskingPass(*PR);
-  initializeSIPreAllocateWWMRegsPass(*PR);
+  initializeSIPreAllocateWWMRegsLegacyPass(*PR);
   initializeSIFormMemoryClausesPass(*PR);
   initializeSIPostRABundlerPass(*PR);
   initializeGCNCreateVOPDPass(*PR);
@@ -1443,7 +1443,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
 
   // Equivalent of PEI for SGPRs.
   addPass(&SILowerSGPRSpillsLegacyID);
-  addPass(&SIPreAllocateWWMRegsID);
+  addPass(&SIPreAllocateWWMRegsLegacyID);
 
   addPass(createVGPRAllocPass(false));
 
@@ -1467,7 +1467,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
 
   // Equivalent of PEI for SGPRs.
   addPass(&SILowerSGPRSpillsLegacyID);
-  addPass(&SIPreAllocateWWMRegsID);
+  addPass(&SIPreAllocateWWMRegsLegacyID);
 
   addPass(createVGPRAllocPass(true));
 
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp 
b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
index 0635cab7b872e2..c1d7a464a81537 100644
--- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -11,6 +11,7 @@
 //
 
//===

[llvm-branch-commits] [libcxx] [libc++][format][3/3] Improves formatting performance. (PR #108990)

2024-09-25 Thread Mark de Wever via llvm-branch-commits

https://github.com/mordante updated 
https://github.com/llvm/llvm-project/pull/108990

>From d435a3118ffe1cca91eff7eeea19f4bc243384c4 Mon Sep 17 00:00:00 2001
From: Mark de Wever 
Date: Sat, 30 Mar 2024 17:35:56 +0100
Subject: [PATCH] [libc++][format][3/3] Improves formatting performance.

This changes the __output_buffer to a new structure. This improves the
performace of std::format, std::format_to, std::format_to_n, and
std::foramtted size.
---
 libcxx/include/__format/buffer.h  | 616 ++
 libcxx/include/__format/format_functions.h|  29 +-
 .../test/libcxx/transitive_includes/cxx03.csv |  18 -
 .../test/libcxx/transitive_includes/cxx11.csv |  18 -
 .../test/libcxx/transitive_includes/cxx14.csv |  18 -
 .../test/libcxx/transitive_includes/cxx17.csv |   8 -
 .../format/format.functions/format_tests.h|   2 +-
 7 files changed, 352 insertions(+), 357 deletions(-)

diff --git a/libcxx/include/__format/buffer.h b/libcxx/include/__format/buffer.h
index 8598f0a1c03957..a0caaccb47c2d2 100644
--- a/libcxx/include/__format/buffer.h
+++ b/libcxx/include/__format/buffer.h
@@ -14,6 +14,7 @@
 #include <__algorithm/fill_n.h>
 #include <__algorithm/max.h>
 #include <__algorithm/min.h>
+#include <__algorithm/ranges_copy.h>
 #include <__algorithm/ranges_copy_n.h>
 #include <__algorithm/transform.h>
 #include <__algorithm/unwrap_iter.h>
@@ -29,6 +30,7 @@
 #include <__iterator/wrap_iter.h>
 #include <__memory/addressof.h>
 #include <__memory/allocate_at_least.h>
+#include <__memory/allocator.h>
 #include <__memory/allocator_traits.h>
 #include <__memory/construct_at.h>
 #include <__memory/ranges_construct_at.h>
@@ -38,6 +40,7 @@
 #include <__utility/exception_guard.h>
 #include <__utility/move.h>
 #include 
+#include 
 #include 
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -53,24 +56,150 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace __format {
 
+// A helper to limit the total size of code units written.
+class _LIBCPP_HIDE_FROM_ABI __max_output_size {
+public:
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __max_output_size(size_t 
__max_size) : __max_size_{__max_size} {}
+
+  // This function adjusts the size of a (bulk) write operations. It ensures 
the
+  // number of code units written by a __output_buffer never exceeds
+  // __max_size_ code units.
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __write_request(size_t 
__code_units) {
+size_t __result =
+__code_units_written_ < __max_size_ ? std::min(__code_units, 
__max_size_ - __code_units_written_) : 0;
+__code_units_written_ += __code_units;
+return __result;
+  }
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __code_units_written() const 
noexcept { return __code_units_written_; }
+
+private:
+  size_t __max_size_;
+  // The code units that would have been written if there was no limit.
+  // format_to_n returns this value.
+  size_t __code_units_written_{0};
+};
+
 /// A "buffer" that handles writing to the proper iterator.
 ///
 /// This helper is used together with the @ref back_insert_iterator to offer
 /// type-erasure for the formatting functions. This reduces the number to
 /// template instantiations.
+///
+/// The design is the following:
+/// - There is an external object that connects the buffer to the output.
+/// - This buffer object:
+///   - inherits publicly from this class.
+///   - has a static or dynamic buffer.
+///   - has a static member function to make space in its buffer write
+/// operations. This can be done by increasing the size of the internal
+/// buffer or by writing the contents of the buffer to the output iterator.
+///
+/// This member function is a constructor argument, so its name is not
+/// fixed. The code uses the name __prepare_write.
+/// - The number of output code units can be limited by a __max_output_size
+///   object. This is used in format_to_n This object:
+///   - Contains the maximum number of code units to be written.
+///   - Contains the number of code units that are requested to be written.
+/// This number is returned to the user of format_to_n.
+///   - The write functions call the object's __request_write member function.
+/// This function:
+/// - Updates the number of code units that are requested to be written.
+/// - Returns the number of code units that can be written without
+///   exceeding the maximum number of code units to be written.
+///
+/// Documentation for the buffer usage members:
+/// - __ptr_
+///   The start of the buffer.
+/// - __capacity_
+///   The number of code units that can be written. This means
+///   [__ptr_, __ptr_ + __capacity_) is a valid range to write to.
+/// - __size_
+///   The number of code units written in the buffer. The next code unit will
+///   be written at __ptr_ + __size_. This __size_ may NOT contain the total
+///   number of code units written by the __output_buffer. Whether or not it
+///   does depends on the sub-class used. Typically the total number o

[llvm-branch-commits] [libcxx] [libc++][format][3/3] Improves formatting performance. (PR #108990)

2024-09-25 Thread Louis Dionne via llvm-branch-commits

https://github.com/ldionne edited 
https://github.com/llvm/llvm-project/pull/108990
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++][format][3/3] Improves formatting performance. (PR #108990)

2024-09-25 Thread Louis Dionne via llvm-branch-commits


@@ -319,188 +353,222 @@ struct _LIBCPP_TEMPLATE_VIS 
__back_insert_iterator_container
-class _LIBCPP_TEMPLATE_VIS __writer_container {
+// A dynamically growing buffer.
+template <__fmt_char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __allocating_buffer : public 
__output_buffer<_CharT> {
 public:
-  using _CharT = typename _Container::value_type;
+  __allocating_buffer(const __allocating_buffer&)= delete;
+  __allocating_buffer& operator=(const __allocating_buffer&) = delete;
 
-  _LIBCPP_HIDE_FROM_ABI explicit 
__writer_container(back_insert_iterator<_Container> __out_it)
-  : __container_{__out_it.__get_container()} {}
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __allocating_buffer() : 
__allocating_buffer{nullptr} {}
 
-  _LIBCPP_HIDE_FROM_ABI auto __out_it() { return 
std::back_inserter(*__container_); }
+  [[nodiscard]]
+  _LIBCPP_HIDE_FROM_ABI explicit __allocating_buffer(__max_output_size* 
__max_output_size)
+  : __output_buffer<_CharT>{__buffer_, __buffer_size_, __prepare_write, 
__max_output_size} {}
 
-  _LIBCPP_HIDE_FROM_ABI void __flush(_CharT* __ptr, size_t __n) {
-__container_->insert(__container_->end(), __ptr, __ptr + __n);
+  _LIBCPP_HIDE_FROM_ABI ~__allocating_buffer() {
+if (__ptr_ != __buffer_) {
+  ranges::destroy_n(__ptr_, this->__size());
+  allocator_traits<_Alloc>::deallocate(__alloc_, __ptr_, 
this->__capacity());
+}
   }
 
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string_view<_CharT> __view() { 
return {__ptr_, this->__size()}; }
+
 private:
-  _Container* __container_;
-};
+  // At the moment the allocator is hard-code. There might be reasons to have
+  // an allocator trait in the future. This ensures forward compatibility.
+  using _Alloc = allocator<_CharT>;
+  _LIBCPP_NO_UNIQUE_ADDRESS _Alloc __alloc_;
 
-/// Selects the type of the writer used for the output iterator.
-template 
-class _LIBCPP_TEMPLATE_VIS __writer_selector {
-  using _Container = typename __back_insert_iterator_container<_OutIt>::type;
+  // Since allocating is expensive the class has a small internal buffer. When
+  // its capacity is exceeded a dynamic buffer will be allocated.
+  static constexpr size_t __buffer_size_ = 256;
+  _CharT __buffer_[__buffer_size_];
 
-public:
-  using type =
-  conditional_t,
-__writer_container<_Container>,
-conditional_t<__enable_direct_output<_OutIt, _CharT>,
-  __writer_direct<_OutIt, _CharT>,
-  __writer_iterator<_OutIt, _CharT>>>;
+  _CharT* __ptr_{__buffer_};
+
+  _LIBCPP_HIDE_FROM_ABI void __grow_buffer(size_t __capacity) {
+if (__capacity < __buffer_size_)
+  return;
+
+_LIBCPP_ASSERT_INTERNAL(__capacity > this->__capacity(), "the buffer must 
grow");
+auto __result = std::__allocate_at_least(__alloc_, __capacity);
+auto __guard  = std::__make_exception_guard([&] {
+  allocator_traits<_Alloc>::deallocate(__alloc_, __result.ptr, 
__result.count);
+});
+// This shouldn't throw, but just to be safe. Note that at -O1 this
+// guard is optimized away so there is no runtime overhead.
+new (__result.ptr) _CharT[__result.count];
+std::copy_n(__ptr_, this->__size(), __result.ptr);
+__guard.__complete();
+if (__ptr_ != __buffer_) {
+  ranges::destroy_n(__ptr_, this->__capacity());
+  allocator_traits<_Alloc>::deallocate(__alloc_, __ptr_, 
this->__capacity());
+}
+
+__ptr_ = __result.ptr;
+this->__buffer_moved(__ptr_, __result.count);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void __prepare_write(size_t __size_hint) {
+__grow_buffer(std::max(this->__capacity() + __size_hint, 
this->__capacity() * 1.6));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI static void __prepare_write(__output_buffer<_CharT>& 
__buffer, size_t __size_hint) {
+
static_cast<__allocating_buffer<_CharT>&>(__buffer).__prepare_write(__size_hint);
+  }
 };
 
-/// The generic formatting buffer.
+// A buffer that directly writes to the underlying buffer.
 template 
-  requires(output_iterator<_OutIt, const _CharT&>)
-class _LIBCPP_TEMPLATE_VIS __format_buffer {
-  using _Storage =
-  conditional_t<__enable_direct_output<_OutIt, _CharT>, 
__direct_storage<_CharT>, __internal_storage<_CharT>>;
-
+class _LIBCPP_TEMPLATE_VIS __direct_iterator_buffer : public 
__output_buffer<_CharT> {
 public:
-  _LIBCPP_HIDE_FROM_ABI explicit __format_buffer(_OutIt __out_it)
-requires(same_as<_Storage, __internal_storage<_CharT>>)
-  : __output_(__storage_.__begin(), __storage_.__buffer_size, this), 
__writer_(std::move(__out_it)) {}
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __direct_iterator_buffer(_OutIt 
__out_it)
+  : __direct_iterator_buffer{__out_it, nullptr} {}
 
-  _LIBCPP_HIDE_FROM_ABI explicit __format_buffer(_OutIt __out_it)
-requires(same_as<_Storage, __direct_storage<_CharT>>)
-  : __output_(std::__unwrap_iter(__out_it), size_t(-1), this), 
__writer_(std::move(__out_it)) 

[llvm-branch-commits] [libcxx] [libc++][format][3/3] Improves formatting performance. (PR #108990)

2024-09-25 Thread Louis Dionne via llvm-branch-commits


@@ -53,24 +56,150 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace __format {
 
+// A helper to limit the total size of code units written.
+class _LIBCPP_HIDE_FROM_ABI __max_output_size {
+public:
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __max_output_size(size_t 
__max_size) : __max_size_{__max_size} {}
+
+  // This function adjusts the size of a (bulk) write operations. It ensures 
the
+  // number of code units written by a __output_buffer never exceeds
+  // __max_size_ code units.
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __write_request(size_t 
__code_units) {
+size_t __result =
+__code_units_written_ < __max_size_ ? std::min(__code_units, 
__max_size_ - __code_units_written_) : 0;
+__code_units_written_ += __code_units;
+return __result;
+  }
+
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __code_units_written() const 
noexcept { return __code_units_written_; }
+
+private:
+  size_t __max_size_;
+  // The code units that would have been written if there was no limit.
+  // format_to_n returns this value.
+  size_t __code_units_written_{0};
+};
+
 /// A "buffer" that handles writing to the proper iterator.
 ///
 /// This helper is used together with the @ref back_insert_iterator to offer
 /// type-erasure for the formatting functions. This reduces the number to
 /// template instantiations.
+///
+/// The design is the following:
+/// - There is an external object that connects the buffer to the output.
+/// - This buffer object:
+///   - inherits publicly from this class.
+///   - has a static or dynamic buffer.
+///   - has a static member function to make space in its buffer write
+/// operations. This can be done by increasing the size of the internal
+/// buffer or by writing the contents of the buffer to the output iterator.
+///
+/// This member function is a constructor argument, so its name is not
+/// fixed. The code uses the name __prepare_write.
+/// - The number of output code units can be limited by a __max_output_size
+///   object. This is used in format_to_n This object:
+///   - Contains the maximum number of code units to be written.
+///   - Contains the number of code units that are requested to be written.
+/// This number is returned to the user of format_to_n.
+///   - The write functions call the object's __request_write member function.
+/// This function:
+/// - Updates the number of code units that are requested to be written.
+/// - Returns the number of code units that can be written without
+///   exceeding the maximum number of code units to be written.
+///
+/// Documentation for the buffer usage members:
+/// - __ptr_
+///   The start of the buffer.
+/// - __capacity_
+///   The number of code units that can be written. This means
+///   [__ptr_, __ptr_ + __capacity_) is a valid range to write to.
+/// - __size_
+///   The number of code units written in the buffer. The next code unit will
+///   be written at __ptr_ + __size_. This __size_ may NOT contain the total
+///   number of code units written by the __output_buffer. Whether or not it
+///   does depends on the sub-class used. Typically the total number of code
+///   units written is not interesting. It is interesting for format_to_n which
+///   has its own way to track this number.
+///
+/// Documentation for the buffer modifying buffer operations:

ldionne wrote:

```suggestion
/// Documentation for the modifying buffer operations:
```

https://github.com/llvm/llvm-project/pull/108990
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++][format][3/3] Improves formatting performance. (PR #108990)

2024-09-25 Thread Louis Dionne via llvm-branch-commits

https://github.com/ldionne approved this pull request.

This LGTM once the comments have been addressed. In particular, we shouldn't 
need to change the transitive includes.

https://github.com/llvm/llvm-project/pull/108990
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Fix the assertion for atomic store with 'ptr' type (PR #109915)

2024-09-25 Thread WÁNG Xuěruì via llvm-branch-commits

https://github.com/xen0n approved this pull request.

fixes loongson-community/discussions#68

https://github.com/llvm/llvm-project/pull/109915
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)

2024-09-25 Thread Akshat Oke via llvm-branch-commits

https://github.com/Akshat-Oke updated 
https://github.com/llvm/llvm-project/pull/109938

>From 22bb8f0e07088515380c2948ce7b37a041a67e0e Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 24 Sep 2024 09:07:04 +
Subject: [PATCH] [NewPM][CodeGen] Port LiveRegMatrix to NPM

---
 llvm/include/llvm/CodeGen/LiveRegMatrix.h | 50 ---
 llvm/include/llvm/InitializePasses.h  |  2 +-
 .../llvm/Passes/MachinePassRegistry.def   |  4 +-
 llvm/lib/CodeGen/LiveRegMatrix.cpp| 38 ++
 llvm/lib/CodeGen/RegAllocBasic.cpp|  8 +--
 llvm/lib/CodeGen/RegAllocGreedy.cpp   |  8 +--
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp |  6 +--
 .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp|  6 +--
 9 files changed, 88 insertions(+), 35 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h 
b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
index 2b32308c7c075e..c024ca9c1dc38d 100644
--- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
@@ -37,7 +37,9 @@ class MachineFunction;
 class TargetRegisterInfo;
 class VirtRegMap;
 
-class LiveRegMatrix : public MachineFunctionPass {
+class LiveRegMatrix {
+  friend class LiveRegMatrixWrapperPass;
+  friend class LiveRegMatrixAnalysis;
   const TargetRegisterInfo *TRI = nullptr;
   LiveIntervals *LIS = nullptr;
   VirtRegMap *VRM = nullptr;
@@ -57,15 +59,21 @@ class LiveRegMatrix : public MachineFunctionPass {
   unsigned RegMaskVirtReg = 0;
   BitVector RegMaskUsable;
 
-  // MachineFunctionPass boilerplate.
-  void getAnalysisUsage(AnalysisUsage &) const override;
-  bool runOnMachineFunction(MachineFunction &) override;
-  void releaseMemory() override;
+  LiveRegMatrix() = default;
+  void releaseMemory();
 
 public:
-  static char ID;
-
-  LiveRegMatrix();
+  LiveRegMatrix(LiveRegMatrix &&Other)
+  : TRI(Other.TRI), LIS(Other.LIS), VRM(Other.VRM), UserTag(Other.UserTag),
+Matrix(std::move(Other.Matrix)), Queries(std::move(Other.Queries)),
+RegMaskTag(Other.RegMaskTag), RegMaskVirtReg(Other.RegMaskVirtReg),
+RegMaskUsable(std::move(Other.RegMaskUsable)) {
+Other.TRI = nullptr;
+Other.LIS = nullptr;
+Other.VRM = nullptr;
+  }
+
+  void init(MachineFunction &MF, LiveIntervals *LIS, VirtRegMap *VRM);
 
   
//======//
   // High-level interface.
@@ -159,6 +167,32 @@ class LiveRegMatrix : public MachineFunctionPass {
   Register getOneVReg(unsigned PhysReg) const;
 };
 
+class LiveRegMatrixWrapperPass : public MachineFunctionPass {
+  LiveRegMatrix LRM;
+
+public:
+  static char ID;
+
+  LiveRegMatrixWrapperPass() : MachineFunctionPass(ID) {}
+
+  LiveRegMatrix &getLRM() { return LRM; }
+  const LiveRegMatrix &getLRM() const { return LRM; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  void releaseMemory() override;
+};
+
+class LiveRegMatrixAnalysis : public AnalysisInfoMixin {
+  friend AnalysisInfoMixin;
+  static AnalysisKey Key;
+
+public:
+  using Result = LiveRegMatrix;
+
+  LiveRegMatrix run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM);
+};
+
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_LIVEREGMATRIX_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 9ce92d7da8700b..8c5607b33096f2 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -156,7 +156,7 @@ void initializeLiveDebugValuesPass(PassRegistry &);
 void initializeLiveDebugVariablesPass(PassRegistry &);
 void initializeLiveIntervalsWrapperPassPass(PassRegistry &);
 void initializeLiveRangeShrinkPass(PassRegistry &);
-void initializeLiveRegMatrixPass(PassRegistry &);
+void initializeLiveRegMatrixWrapperPassPass(PassRegistry &);
 void initializeLiveStacksPass(PassRegistry &);
 void initializeLiveVariablesWrapperPassPass(PassRegistry &);
 void initializeLoadStoreOptPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index bdc56ca03f392a..4497c1fce0db69 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -97,6 +97,7 @@ LOOP_PASS("loop-term-fold", LoopTermFoldPass())
 // preferably fix the scavenger to not depend on them).
 MACHINE_FUNCTION_ANALYSIS("live-intervals", LiveIntervalsAnalysis())
 MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis())
+MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis())
 MACHINE_FUNCTION_ANALYSIS("machine-block-freq", 
MachineBlockFrequencyAnalysis())
 MACHINE_FUNCTION_ANALYSIS("machine-branch-prob",
   MachineBranchProbabilityAnalysis())
@@ -122,8 +123,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", 
VirtRegMapAnalysis())
 // MachineRegionInfoPa

[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)

2024-09-25 Thread Akshat Oke via llvm-branch-commits

https://github.com/Akshat-Oke updated 
https://github.com/llvm/llvm-project/pull/109939

>From 4c5184af67fb5eaeec1eb971421b8a3030b22c76 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 24 Sep 2024 11:41:18 +
Subject: [PATCH] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  6 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  7 ++-
 .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp| 60 ---
 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 30 ++
 5 files changed, 77 insertions(+), 27 deletions(-)
 create mode 100644 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index b2dd354e496a2e..c0fd5e4625895a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -49,7 +49,7 @@ FunctionPass *createSIFixSGPRCopiesLegacyPass();
 FunctionPass *createLowerWWMCopiesPass();
 FunctionPass *createSIMemoryLegalizerPass();
 FunctionPass *createSIInsertWaitcntsPass();
-FunctionPass *createSIPreAllocateWWMRegsPass();
+FunctionPass *createSIPreAllocateWWMRegsLegacyPass();
 FunctionPass *createSIFormMemoryClausesPass();
 
 FunctionPass *createSIPostRABundlerPass();
@@ -208,8 +208,8 @@ extern char &SILateBranchLoweringPassID;
 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
 extern char &SIOptimizeExecMaskingID;
 
-void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
-extern char &SIPreAllocateWWMRegsID;
+void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &);
+extern char &SIPreAllocateWWMRegsLegacyID;
 
 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
 extern char &AMDGPUImageIntrinsicOptimizerID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 0ebf34c901c142..174a90f0aa419d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -102,5 +102,6 @@ MACHINE_FUNCTION_PASS("gcn-dpp-combine", 
GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
 MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
+MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
 MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
 #undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 04fdee0819b502..9a28c648e2c4ed 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -41,6 +41,7 @@
 #include "SIMachineFunctionInfo.h"
 #include "SIMachineScheduler.h"
 #include "SIPeepholeSDWA.h"
+#include "SIPreAllocateWWMRegs.h"
 #include "SIShrinkInstructions.h"
 #include "TargetInfo/AMDGPUTargetInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
@@ -461,7 +462,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSILateBranchLoweringPass(*PR);
   initializeSIMemoryLegalizerPass(*PR);
   initializeSIOptimizeExecMaskingPass(*PR);
-  initializeSIPreAllocateWWMRegsPass(*PR);
+  initializeSIPreAllocateWWMRegsLegacyPass(*PR);
   initializeSIFormMemoryClausesPass(*PR);
   initializeSIPostRABundlerPass(*PR);
   initializeGCNCreateVOPDPass(*PR);
@@ -1443,7 +1444,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
 
   // Equivalent of PEI for SGPRs.
   addPass(&SILowerSGPRSpillsLegacyID);
-  addPass(&SIPreAllocateWWMRegsID);
+  addPass(&SIPreAllocateWWMRegsLegacyID);
 
   addPass(createVGPRAllocPass(false));
 
@@ -1467,7 +1468,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
 
   // Equivalent of PEI for SGPRs.
   addPass(&SILowerSGPRSpillsLegacyID);
-  addPass(&SIPreAllocateWWMRegsID);
+  addPass(&SIPreAllocateWWMRegsLegacyID);
 
   addPass(createVGPRAllocPass(true));
 
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp 
b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
index 0635cab7b872e2..c1d7a464a81537 100644
--- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -11,6 +11,7 @@
 //
 
//===--===//
 
+#include "SIPreAllocateWWMRegs.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -34,7 +35,7 @@ static cl::opt
 
 namespace {
 
-class SIPreAllocateWWMRegs : public MachineFunctionPass {
+class SIPreAllocateWWMRegs {
 private:
   const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
@@ -48,13 +49,21 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass {
 #ifndef NDEBUG
   void printWWMInfo(const MachineInstr &MI);
 #endif
+  bool processDef(MachineOperand &MO);
+  void rewriteRegs(MachineFunction &MF);
+
+public:
+  SIPreAllocateWWMR

[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)

2024-09-25 Thread Matt Arsenault via llvm-branch-commits


@@ -57,15 +59,21 @@ class LiveRegMatrix : public MachineFunctionPass {
   unsigned RegMaskVirtReg = 0;
   BitVector RegMaskUsable;
 
-  // MachineFunctionPass boilerplate.
-  void getAnalysisUsage(AnalysisUsage &) const override;
-  bool runOnMachineFunction(MachineFunction &) override;
-  void releaseMemory() override;
+  LiveRegMatrix() = default;
+  void releaseMemory();
 
 public:
-  static char ID;
-
-  LiveRegMatrix();
+  LiveRegMatrix(LiveRegMatrix &&Other)
+  : TRI(Other.TRI), LIS(Other.LIS), VRM(Other.VRM), UserTag(Other.UserTag),
+Matrix(std::move(Other.Matrix)), Queries(std::move(Other.Queries)),
+RegMaskTag(Other.RegMaskTag), RegMaskVirtReg(Other.RegMaskVirtReg),
+RegMaskUsable(std::move(Other.RegMaskUsable)) {
+Other.TRI = nullptr;
+Other.LIS = nullptr;
+Other.VRM = nullptr;
+  }
+
+  void init(MachineFunction &MF, LiveIntervals *LIS, VirtRegMap *VRM);

arsenm wrote:

Make these references, these are required anyway 

https://github.com/llvm/llvm-project/pull/109938
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)

2024-09-25 Thread Matt Arsenault via llvm-branch-commits


@@ -57,15 +59,21 @@ class LiveRegMatrix : public MachineFunctionPass {
   unsigned RegMaskVirtReg = 0;
   BitVector RegMaskUsable;
 
-  // MachineFunctionPass boilerplate.
-  void getAnalysisUsage(AnalysisUsage &) const override;
-  bool runOnMachineFunction(MachineFunction &) override;
-  void releaseMemory() override;
+  LiveRegMatrix() = default;
+  void releaseMemory();
 
 public:
-  static char ID;
-
-  LiveRegMatrix();
+  LiveRegMatrix(LiveRegMatrix &&Other)

arsenm wrote:

Why does this need a move constructor? 

https://github.com/llvm/llvm-project/pull/109938
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)

2024-09-25 Thread Matt Arsenault via llvm-branch-commits


@@ -159,6 +167,32 @@ class LiveRegMatrix : public MachineFunctionPass {
   Register getOneVReg(unsigned PhysReg) const;
 };
 
+class LiveRegMatrixWrapperPass : public MachineFunctionPass {

arsenm wrote:

Rename to LiveRegMatrixWrapperLegacy to avoid PassPass 

https://github.com/llvm/llvm-project/pull/109938
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)

2024-09-25 Thread Matt Arsenault via llvm-branch-commits


@@ -57,15 +59,21 @@ class LiveRegMatrix : public MachineFunctionPass {
   unsigned RegMaskVirtReg = 0;
   BitVector RegMaskUsable;
 
-  // MachineFunctionPass boilerplate.
-  void getAnalysisUsage(AnalysisUsage &) const override;
-  bool runOnMachineFunction(MachineFunction &) override;
-  void releaseMemory() override;
+  LiveRegMatrix() = default;
+  void releaseMemory();
 
 public:
-  static char ID;
-
-  LiveRegMatrix();
+  LiveRegMatrix(LiveRegMatrix &&Other)
+  : TRI(Other.TRI), LIS(Other.LIS), VRM(Other.VRM), UserTag(Other.UserTag),
+Matrix(std::move(Other.Matrix)), Queries(std::move(Other.Queries)),
+RegMaskTag(Other.RegMaskTag), RegMaskVirtReg(Other.RegMaskVirtReg),
+RegMaskUsable(std::move(Other.RegMaskUsable)) {
+Other.TRI = nullptr;
+Other.LIS = nullptr;
+Other.VRM = nullptr;

arsenm wrote:

Shouldn't need to clear these from Other 

https://github.com/llvm/llvm-project/pull/109938
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)

2024-09-25 Thread Matt Arsenault via llvm-branch-commits


@@ -156,7 +156,7 @@ void initializeLiveDebugValuesPass(PassRegistry &);
 void initializeLiveDebugVariablesPass(PassRegistry &);
 void initializeLiveIntervalsWrapperPassPass(PassRegistry &);
 void initializeLiveRangeShrinkPass(PassRegistry &);
-void initializeLiveRegMatrixPass(PassRegistry &);
+void initializeLiveRegMatrixWrapperPassPass(PassRegistry &);

arsenm wrote:

Avoid PassPass 

https://github.com/llvm/llvm-project/pull/109938
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)

2024-09-25 Thread Matt Arsenault via llvm-branch-commits


@@ -97,6 +97,7 @@ LOOP_PASS("loop-term-fold", LoopTermFoldPass())
 // preferably fix the scavenger to not depend on them).
 MACHINE_FUNCTION_ANALYSIS("live-intervals", LiveIntervalsAnalysis())
 MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis())
+MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis())

arsenm wrote:

Alphabetize 

https://github.com/llvm/llvm-project/pull/109938
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)

2024-09-25 Thread Matt Arsenault via llvm-branch-commits


@@ -0,0 +1,21 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs 
-amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - 
-mcpu=tahiti %s | FileCheck %s
+
+---
+
+name: pre_allocate_wwm_spill_to_vgpr
+tracksRegLiveness: true
+body: |
+  bb.0:
+liveins: $sgpr1
+; CHECK-LABEL: name: pre_allocate_wwm_spill_to_vgpr
+; CHECK: liveins: $sgpr1
+; CHECK-NEXT: {{  $}}
+; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+; CHECK-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, [[DEF]]
+; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+%0:vgpr_32 = IMPLICIT_DEF
+%23:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, %0:vgpr_32

arsenm wrote:

Compact register numbers 

https://github.com/llvm/llvm-project/pull/109963
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)

2024-09-25 Thread Matt Arsenault via llvm-branch-commits


@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs 
-run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s  | FileCheck %s
+
+---
+
+name: pre_allocate_wwm_regs_strict
+tracksRegLiveness: true
+body: |
+  bb.0:
+liveins: $sgpr1
+; CHECK-LABEL: name: pre_allocate_wwm_regs_strict
+; CHECK: liveins: $sgpr1
+; CHECK-NEXT: {{  $}}
+; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+; CHECK-NEXT: renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def 
$exec, implicit-def $scc, implicit $exec
+; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+; CHECK-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, 
implicit $exec
+; CHECK-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
+; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+%0:vgpr_32 = IMPLICIT_DEF
+renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, 
implicit-def $scc, implicit $exec
+%24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec

arsenm wrote:

Use run-pass=none to compact the register numbers 

https://github.com/llvm/llvm-project/pull/109963
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)

2024-09-25 Thread Matt Arsenault via llvm-branch-commits


@@ -0,0 +1,21 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs 
-amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - 
-mcpu=tahiti %s | FileCheck %s
+

arsenm wrote:

Why does this need to be split into a separate test file? 

https://github.com/llvm/llvm-project/pull/109963
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)

2024-09-25 Thread Matt Arsenault via llvm-branch-commits


@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs 
-run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s  | FileCheck %s
+
+---
+
+name: pre_allocate_wwm_regs_strict
+tracksRegLiveness: true
+body: |

arsenm wrote:

This really needs checks for the MFI serialized allocated register. It is not 
yet serialized, but it really needs to be (and is currently causing issues for 
me) 

https://github.com/llvm/llvm-project/pull/109963
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)

2024-09-25 Thread Akshat Oke via llvm-branch-commits

https://github.com/Akshat-Oke updated 
https://github.com/llvm/llvm-project/pull/109939

>From 3d8720930eaf0acd31c39722c98da085066ed315 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 25 Sep 2024 11:21:04 +
Subject: [PATCH 1/2] [AMDGPU] Add tests for SIPreAllocateWWMRegs

---
 .../AMDGPU/si-pre-allocate-wwm-regs.mir   | 26 +++
 .../si-pre-allocate-wwm-sgpr-spills.mir   | 21 +++
 2 files changed, 47 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir

diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir 
b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
new file mode 100644
index 00..f2db299f575f5e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs 
-run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s  | FileCheck %s
+
+---
+
+name: pre_allocate_wwm_regs_strict
+tracksRegLiveness: true
+body: |
+  bb.0:
+liveins: $sgpr1
+; CHECK-LABEL: name: pre_allocate_wwm_regs_strict
+; CHECK: liveins: $sgpr1
+; CHECK-NEXT: {{  $}}
+; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+; CHECK-NEXT: renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def 
$exec, implicit-def $scc, implicit $exec
+; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+; CHECK-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, 
implicit $exec
+; CHECK-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
+; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+%0:vgpr_32 = IMPLICIT_DEF
+renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, 
implicit-def $scc, implicit $exec
+%24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+%25:vgpr_32 = V_MOV_B32_dpp %24:vgpr_32(tied-def 0), %0:vgpr_32, 323, 12, 
15, 0, implicit $exec
+$exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
+%2:vgpr_32 = COPY %0:vgpr_32
+...
diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir 
b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir
new file mode 100644
index 00..f0efe74878d831
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir
@@ -0,0 +1,21 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs 
-amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - 
-mcpu=tahiti %s | FileCheck %s
+
+---
+
+name: pre_allocate_wwm_spill_to_vgpr
+tracksRegLiveness: true
+body: |
+  bb.0:
+liveins: $sgpr1
+; CHECK-LABEL: name: pre_allocate_wwm_spill_to_vgpr
+; CHECK: liveins: $sgpr1
+; CHECK-NEXT: {{  $}}
+; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+; CHECK-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, [[DEF]]
+; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+%0:vgpr_32 = IMPLICIT_DEF
+%23:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, %0:vgpr_32
+%2:vgpr_32 = COPY %0:vgpr_32
+...
+

>From 0d0cd3fb0bdc41731c89492dbe34a1ebf939c52e Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 24 Sep 2024 11:41:18 +
Subject: [PATCH 2/2] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  6 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  7 ++-
 .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp| 60 ---
 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h | 30 ++
 .../AMDGPU/si-pre-allocate-wwm-regs.mir   |  1 +
 .../si-pre-allocate-wwm-sgpr-spills.mir   |  1 +
 7 files changed, 79 insertions(+), 27 deletions(-)
 create mode 100644 llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.h

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index b2dd354e496a2e..c0fd5e4625895a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -49,7 +49,7 @@ FunctionPass *createSIFixSGPRCopiesLegacyPass();
 FunctionPass *createLowerWWMCopiesPass();
 FunctionPass *createSIMemoryLegalizerPass();
 FunctionPass *createSIInsertWaitcntsPass();
-FunctionPass *createSIPreAllocateWWMRegsPass();
+FunctionPass *createSIPreAllocateWWMRegsLegacyPass();
 FunctionPass *createSIFormMemoryClausesPass();
 
 FunctionPass *createSIPostRABundlerPass();
@@ -208,8 +208,8 @@ extern char &SILateBranchLoweringPassID;
 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
 extern char &SIOptimizeExecMaskingID;
 
-void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
-extern char &SIPreAllocateWWMRegsID;
+void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &);
+extern char &SIPreAllocateWWMRegsLegacyID;
 

[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)

2024-09-25 Thread Akshat Oke via llvm-branch-commits

https://github.com/Akshat-Oke edited 
https://github.com/llvm/llvm-project/pull/109939
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)

2024-09-25 Thread Akshat Oke via llvm-branch-commits

https://github.com/Akshat-Oke created 
https://github.com/llvm/llvm-project/pull/109963

None

>From 3d8720930eaf0acd31c39722c98da085066ed315 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 25 Sep 2024 11:21:04 +
Subject: [PATCH] [AMDGPU] Add tests for SIPreAllocateWWMRegs

---
 .../AMDGPU/si-pre-allocate-wwm-regs.mir   | 26 +++
 .../si-pre-allocate-wwm-sgpr-spills.mir   | 21 +++
 2 files changed, 47 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir

diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir 
b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
new file mode 100644
index 00..f2db299f575f5e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-regs.mir
@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs 
-run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s  | FileCheck %s
+
+---
+
+name: pre_allocate_wwm_regs_strict
+tracksRegLiveness: true
+body: |
+  bb.0:
+liveins: $sgpr1
+; CHECK-LABEL: name: pre_allocate_wwm_regs_strict
+; CHECK: liveins: $sgpr1
+; CHECK-NEXT: {{  $}}
+; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+; CHECK-NEXT: renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def 
$exec, implicit-def $scc, implicit $exec
+; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+; CHECK-NEXT: dead $vgpr0 = V_MOV_B32_dpp $vgpr0, [[DEF]], 323, 12, 15, 0, 
implicit $exec
+; CHECK-NEXT: $exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
+; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+%0:vgpr_32 = IMPLICIT_DEF
+renamable $sgpr4_sgpr5 = ENTER_STRICT_WWM -1, implicit-def $exec, 
implicit-def $scc, implicit $exec
+%24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+%25:vgpr_32 = V_MOV_B32_dpp %24:vgpr_32(tied-def 0), %0:vgpr_32, 323, 12, 
15, 0, implicit $exec
+$exec = EXIT_STRICT_WWM killed renamable $sgpr4_sgpr5
+%2:vgpr_32 = COPY %0:vgpr_32
+...
diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir 
b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir
new file mode 100644
index 00..f0efe74878d831
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-allocate-wwm-sgpr-spills.mir
@@ -0,0 +1,21 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs 
-amdgpu-prealloc-sgpr-spill-vgprs -run-pass=si-pre-allocate-wwm-regs -o - 
-mcpu=tahiti %s | FileCheck %s
+
+---
+
+name: pre_allocate_wwm_spill_to_vgpr
+tracksRegLiveness: true
+body: |
+  bb.0:
+liveins: $sgpr1
+; CHECK-LABEL: name: pre_allocate_wwm_spill_to_vgpr
+; CHECK: liveins: $sgpr1
+; CHECK-NEXT: {{  $}}
+; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+; CHECK-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, [[DEF]]
+; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+%0:vgpr_32 = IMPLICIT_DEF
+%23:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 0, %0:vgpr_32
+%2:vgpr_32 = COPY %0:vgpr_32
+...
+

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)

2024-09-25 Thread Akshat Oke via llvm-branch-commits

Akshat-Oke wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/109963?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#109939** https://app.graphite.dev/github/pr/llvm/llvm-project/109939?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#109963** https://app.graphite.dev/github/pr/llvm/llvm-project/109963?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#109938** https://app.graphite.dev/github/pr/llvm/llvm-project/109938?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#109937** https://app.graphite.dev/github/pr/llvm/llvm-project/109937?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#109936** https://app.graphite.dev/github/pr/llvm/llvm-project/109936?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @Akshat-Oke and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/109963
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)

2024-09-25 Thread Matt Arsenault via llvm-branch-commits


@@ -254,3 +262,13 @@ bool 
SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
   rewriteRegs(MF);
   return true;
 }
+
+PreservedAnalyses
+SIPreAllocateWWMRegsPass::run(MachineFunction &MF,
+  MachineFunctionAnalysisManager &MFAM) {
+  auto *LIS = &MFAM.getResult(MF);
+  auto *Matrix = &MFAM.getResult(MF);
+  auto *VRM = &MFAM.getResult(MF);

arsenm wrote:

I thought this should be using getCachedResult, and the pass supported no LIS 
for the fast RA path. But I see now the legacy path is requiring them (although 
it probably shouldn't?) 

https://github.com/llvm/llvm-project/pull/109939
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NewPM][AMDGPU] Port SIPreAllocateWWMRegs to NPM (PR #109939)

2024-09-25 Thread Matt Arsenault via llvm-branch-commits


@@ -0,0 +1,25 @@
+//===--- SIPreAllocateWWMRegs.h 
---===//

arsenm wrote:

Missing C++ mode comment 

https://github.com/llvm/llvm-project/pull/109939
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NewPM][CodeGen] Port LiveRegMatrix to NPM (PR #109938)

2024-09-25 Thread Akshat Oke via llvm-branch-commits

https://github.com/Akshat-Oke ready_for_review 
https://github.com/llvm/llvm-project/pull/109938
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)

2024-09-25 Thread Akshat Oke via llvm-branch-commits

https://github.com/Akshat-Oke ready_for_review 
https://github.com/llvm/llvm-project/pull/109963
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Update correct dependency (PR #109937)

2024-09-25 Thread Akshat Oke via llvm-branch-commits

https://github.com/Akshat-Oke ready_for_review 
https://github.com/llvm/llvm-project/pull/109937
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Adjust the version of __cpp_lib_ranges in C++20 mode (PR #109324)

2024-09-25 Thread Louis Dionne via llvm-branch-commits

ldionne wrote:

@tru Our CI is currently super unstable, that's the failures. We've been 
without CI for roughly the past week. We're working on it.

https://github.com/llvm/llvm-project/pull/109324
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)

2024-09-25 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian updated 
https://github.com/llvm/llvm-project/pull/108258

>From f79d612a7335e7a150c2347638ea6a9e36bbc1ea Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Wed, 25 Sep 2024 14:42:09 -0400
Subject: [PATCH] [Attributor] Take the address space from addrspacecast
 directly

If the value to be analyzed is directly from addrspacecast, we take the source
address space directly. This is to improve the case where in
`AMDGPUPromoteKernelArgumentsPass`, the kernel argument is promoted by
insertting an addrspacecast directly from a generic pointer. However, during the
analysis, the underlying object will be the generic pointer, instead of the
addrspacecast, thus the inferred address space is the generic one, which is not
ideal.
---
 .../Transforms/IPO/AttributorAttributes.cpp   | 60 ++-
 llvm/test/CodeGen/AMDGPU/aa-as-infer.ll   | 35 +++
 2 files changed, 81 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp 
b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 416dd09ca874bf..0cb2e5117741ad 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12596,16 +12596,37 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
   }
 
   ChangeStatus updateImpl(Attributor &A) override {
+assert(A.getInfoCache().getFlatAddressSpace().has_value());
+unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
 uint32_t OldAddressSpace = AssumedAddressSpace;
-auto *AUO = A.getOrCreateAAFor(getIRPosition(), this,
-DepClassTy::REQUIRED);
-auto Pred = [&](Value &Obj) {
+
+auto CheckAddressSpace = [&](Value &Obj) {
   if (isa(&Obj))
 return true;
+  // If an argument in flat address space only has addrspace cast uses, and
+  // those casts are same, then we take the dst addrspace.
+  if (auto *Arg = dyn_cast(&Obj)) {
+if (Arg->getType()->getPointerAddressSpace() == FlatAS) {
+  unsigned CastAddrSpace = FlatAS;
+  for (auto *U : Arg->users()) {
+auto *ASCI = dyn_cast(U);
+if (!ASCI)
+  return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
+if (CastAddrSpace != FlatAS &&
+CastAddrSpace != ASCI->getDestAddressSpace())
+  return false;
+CastAddrSpace = ASCI->getDestAddressSpace();
+  }
+  if (CastAddrSpace != FlatAS)
+return takeAddressSpace(CastAddrSpace);
+}
+  }
   return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
 };
 
-if (!AUO->forallUnderlyingObjects(Pred))
+auto *AUO = A.getOrCreateAAFor(getIRPosition(), this,
+DepClassTy::REQUIRED);
+if (!AUO->forallUnderlyingObjects(CheckAddressSpace))
   return indicatePessimisticFixpoint();
 
 return OldAddressSpace == AssumedAddressSpace ? ChangeStatus::UNCHANGED
@@ -12614,17 +12635,21 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
-if (getAddressSpace() == InvalidAddressSpace ||
-getAddressSpace() == getAssociatedType()->getPointerAddressSpace())
+unsigned NewAS = getAddressSpace();
+
+if (NewAS == InvalidAddressSpace ||
+NewAS == getAssociatedType()->getPointerAddressSpace())
   return ChangeStatus::UNCHANGED;
 
+unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
+
 Value *AssociatedValue = &getAssociatedValue();
-Value *OriginalValue = peelAddrspacecast(AssociatedValue);
+Value *OriginalValue = peelAddrspacecast(AssociatedValue, FlatAS);
 
 PointerType *NewPtrTy =
-PointerType::get(getAssociatedType()->getContext(), getAddressSpace());
+PointerType::get(getAssociatedType()->getContext(), NewAS);
 bool UseOriginalValue =
-OriginalValue->getType()->getPointerAddressSpace() == 
getAddressSpace();
+OriginalValue->getType()->getPointerAddressSpace() == NewAS;
 
 bool Changed = false;
 
@@ -12684,12 +12709,19 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
 return AssumedAddressSpace == AS;
   }
 
-  static Value *peelAddrspacecast(Value *V) {
-if (auto *I = dyn_cast(V))
-  return peelAddrspacecast(I->getPointerOperand());
+  static Value *peelAddrspacecast(Value *V, unsigned FlatAS) {
+if (auto *I = dyn_cast(V)) {
+  assert(I->getSrcAddressSpace() != FlatAS &&
+ "there should not be flat AS -> non-flat AS");
+  return I->getPointerOperand();
+}
 if (auto *C = dyn_cast(V))
-  if (C->getOpcode() == Instruction::AddrSpaceCast)
-return peelAddrspacecast(C->getOperand(0));
+  if (C->getOpcode() == Instruction::AddrSpaceCast) {
+assert(C->getOperand(0)->getType()->getPointerAddressSpac

[llvm-branch-commits] [llvm] [Attributor] Take the address space from addrspacecast directly (PR #108258)

2024-09-25 Thread Shilei Tian via llvm-branch-commits

shiltian wrote:

I unstacked from https://github.com/llvm/llvm-project/pull/108786 to unblock 
this since the ticket needs to be fixed promptly.

https://github.com/llvm/llvm-project/pull/108258
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Add version checks for clauses (PR #110015)

2024-09-25 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz updated 
https://github.com/llvm/llvm-project/pull/110015

>From 3c786ad2a50f146d357d882b0c1d966486f7295f Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Tue, 24 Sep 2024 17:41:16 -0500
Subject: [PATCH 1/4] [flang][OpenMP] Add version checks for clauses

If there is a clause that is allowed on a given directive in a later
version of the OpenMP spec, report an error and provide the minimal
spec version that allows the clause.

The case where a clause is not allowed on a directive at all is already
handled elsewhere.
---
 flang/lib/Semantics/check-omp-structure.cpp   | 93 +--
 flang/lib/Semantics/check-omp-structure.h |  1 +
 flang/test/Lower/OpenMP/atomic-capture.f90|  4 +-
 flang/test/Lower/OpenMP/atomic-read.f90   |  2 +-
 flang/test/Lower/OpenMP/atomic-update.f90 |  4 +-
 flang/test/Lower/OpenMP/atomic-write.f90  |  2 +-
 .../test/Lower/OpenMP/declare-target-data.f90 |  4 +-
 .../declare-target-deferred-marking.f90   |  4 +-
 .../OpenMP/declare-target-func-and-subr.f90   |  4 +-
 ...arget-implicit-func-and-subr-cap-enter.f90 |  8 +-
 ...lare-target-implicit-func-and-subr-cap.f90 |  8 +-
 .../declare-target-implicit-tarop-cap.f90 |  8 +-
 .../Lower/OpenMP/function-filtering-2.f90 | 12 +--
 .../test/Lower/OpenMP/function-filtering.f90  | 12 +--
 .../OpenMP/declare_target-device_type.f90 |  4 +-
 .../Parser/OpenMP/in-reduction-clause.f90 |  4 +-
 flang/test/Parser/OpenMP/order-clause01.f90   |  4 +-
 flang/test/Parser/OpenMP/tile-size.f90|  4 +-
 flang/test/Parser/OpenMP/unroll-full.f90  |  4 +-
 flang/test/Parser/OpenMP/unroll.f90   |  4 +-
 .../Semantics/OpenMP/atomic-hint-clause.f90   |  2 +-
 flang/test/Semantics/OpenMP/atomic01.f90  |  2 +-
 flang/test/Semantics/OpenMP/atomic05.f90  |  2 +-
 .../Semantics/OpenMP/clause-validity01.f90|  2 +-
 .../OpenMP/declarative-directive.f90  |  2 +-
 .../Semantics/OpenMP/declare-target01.f90 |  2 +-
 .../Semantics/OpenMP/declare-target02.f90 |  2 +-
 .../Semantics/OpenMP/declare-target06.f90 |  2 +-
 .../Semantics/OpenMP/device-constructs.f90|  2 +-
 flang/test/Semantics/OpenMP/flush02.f90   |  2 +-
 flang/test/Semantics/OpenMP/if-clause.f90 |  2 +-
 flang/test/Semantics/OpenMP/nontemporal.f90   |  2 +-
 .../test/Semantics/OpenMP/order-clause01.f90  |  2 +-
 .../Semantics/OpenMP/requires-atomic01.f90|  2 +-
 .../Semantics/OpenMP/requires-atomic02.f90|  2 +-
 flang/test/Semantics/OpenMP/requires04.f90|  2 +-
 flang/test/Semantics/OpenMP/requires05.f90|  2 +-
 .../Semantics/OpenMP/simd-nontemporal.f90 |  2 +-
 flang/test/Semantics/OpenMP/target01.f90  |  2 +-
 flang/test/Semantics/OpenMP/taskgroup01.f90   |  2 +-
 .../test/Semantics/OpenMP/use_device_addr.f90 |  2 +-
 .../Semantics/OpenMP/use_device_addr1.f90 |  2 +-
 .../test/Semantics/OpenMP/use_device_ptr1.f90 |  2 +-
 43 files changed, 137 insertions(+), 99 deletions(-)

diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index dfc3f3290a81be..976c159e252f12 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -16,25 +16,25 @@ namespace Fortran::semantics {
 // Use when clause falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
   }
 
 #define CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresConstantPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 #define CHECK_REQ_SCALAR_INT_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::OmpClause::X &c) { \
-CheckAllowed(llvm::omp::Clause::Y); \
+CheckAllowedClause(llvm::omp::Clause::Y); \
 RequiresPositiveParameter(llvm::omp::Clause::Y, c.v); \
   }
 
 // Use when clause don't falls under 'struct OmpClause' in 'parse-tree.h'.
 #define CHECK_SIMPLE_PARSER_CLAUSE(X, Y) \
   void OmpStructureChecker::Enter(const parser::X &) { \
-CheckAllowed(llvm::omp::Y); \
+CheckAllowedClause(llvm::omp::Y); \
   }
 
 // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment
@@ -163,6 +163,43 @@ class AssociatedLoopChecker {
   std::map constructNamesAndLevels_;
 };
 
+bool OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) {
+  unsigned version{context_.langOptions().OpenMPVersion};
+  DirectiveContext &dirCtx = GetContext();
+  llvm::omp::Directive dir{dirCtx.directive};
+
+  if (!llvm::omp::isAllowedClauseForDirective(dir, clause, version)) {
+unsigned allowedInVersion{[&] {
+  for (unsigned v : {45, 50, 51, 52, 60}) {
+if (v <= version) {
+  c